From de69c502f4d962c6787e56df7a83ab7c3a74c2f8 Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Sat, 30 Apr 2022 15:27:47 +0200 Subject: [PATCH 1/4] STY: Documentation, Variable names --- PyPDF2/_page.py | 61 ++++++++++++++++-------------- PyPDF2/_reader.py | 64 ++++++++++++++++++------------- PyPDF2/_writer.py | 43 +++++++++++---------- PyPDF2/constants.py | 4 +- PyPDF2/generic.py | 9 +---- PyPDF2/merger.py | 92 ++++++++++++++++++++++++--------------------- 6 files changed, 143 insertions(+), 130 deletions(-) diff --git a/PyPDF2/_page.py b/PyPDF2/_page.py index 7bd158353..7e2eaa226 100644 --- a/PyPDF2/_page.py +++ b/PyPDF2/_page.py @@ -87,8 +87,9 @@ def createRectangleAccessor(name, fallback): class PageObject(DictionaryObject): """ - This class represents a single page within a PDF file. Typically this - object will be created by accessing the + PageObject represents a single page within a PDF file. + + Typically this object will be created by accessing the :meth:`getPage()` method of the :class:`PdfFileReader` class, but it is also possible to create an empty page with the @@ -107,7 +108,8 @@ def __init__(self, pdf=None, indirectRef=None): @staticmethod def createBlankPage(pdf=None, width=None, height=None): """ - Returns a new blank page. + Return a new blank page. + If ``width`` or ``height`` is ``None``, try to get the page size from the last page of *pdf*. @@ -142,7 +144,7 @@ def createBlankPage(pdf=None, width=None, height=None): def rotateClockwise(self, angle): """ - Rotates a page clockwise by increments of 90 degrees. + Rotate a page clockwise by increments of 90 degrees. :param int angle: Angle to rotate the page. Must be an increment of 90 deg. @@ -154,7 +156,7 @@ def rotateClockwise(self, angle): def rotateCounterClockwise(self, angle): """ - Rotates a page counter-clockwise by increments of 90 degrees. + Rotate a page counter-clockwise by increments of 90 degrees. :param int angle: Angle to rotate the page. Must be an increment of 90 deg. @@ -240,7 +242,7 @@ def _addTransformationMatrix(contents, pdf, ctm): def getContents(self): """ - Accesses the page contents. + Access the page contents. :return: the ``/Contents`` object, or ``None`` if it doesn't exist. ``/Contents`` is optional, as described in PDF Reference 7.7.3.3 @@ -252,7 +254,9 @@ def getContents(self): def mergePage(self, page2): """ - Merges the content streams of two pages into one. Resource references + Merge the content streams of two pages into one. + + Resource references (i.e. fonts) are maintained from both pages. The mediabox/cropbox/etc of this page are not altered. The parameter page's content stream will be added to the end of this page's content stream, meaning that it will @@ -388,8 +392,8 @@ def _mergePage(self, page2, page2transformation=None, ctm=None, expand=False): def mergeTransformedPage(self, page2, ctm, expand=False): """ - This is similar to mergePage, but a transformation matrix is - applied to the merged stream. + mergeTransformedPage is similar to mergePage, but a transformation + matrix is applied to the merged stream. :param PageObject page2: The page to be merged into this one. Should be an instance of :class:`PageObject`. @@ -409,8 +413,8 @@ def mergeTransformedPage(self, page2, ctm, expand=False): def mergeScaledPage(self, page2, scale, expand=False): """ - This is similar to mergePage, but the stream to be merged is scaled - by appling a transformation matrix. + mergeScaledPage is similar to mergePage, but the stream to be merged + is scaled by appling a transformation matrix. :param PageObject page2: The page to be merged into this one. Should be an instance of :class:`PageObject`. @@ -423,8 +427,8 @@ def mergeScaledPage(self, page2, scale, expand=False): def mergeRotatedPage(self, page2, rotation, expand=False): """ - This is similar to mergePage, but the stream to be merged is rotated - by appling a transformation matrix. + mergeRotatedPage is similar to mergePage, but the stream to be merged + is rotated by appling a transformation matrix. :param PageObject page2: the page to be merged into this one. Should be an instance of :class:`PageObject`. @@ -448,8 +452,8 @@ def mergeRotatedPage(self, page2, rotation, expand=False): def mergeTranslatedPage(self, page2, tx, ty, expand=False): """ - This is similar to mergePage, but the stream to be merged is translated - by appling a transformation matrix. + mergeTranslatedPage is similar to mergePage, but the stream to be + merged is translated by appling a transformation matrix. :param PageObject page2: the page to be merged into this one. Should be an instance of :class:`PageObject`. @@ -462,8 +466,8 @@ def mergeTranslatedPage(self, page2, tx, ty, expand=False): def mergeRotatedTranslatedPage(self, page2, rotation, tx, ty, expand=False): """ - This is similar to mergePage, but the stream to be merged is rotated - and translated by appling a transformation matrix. + mergeRotatedTranslatedPage is similar to mergePage, but the stream to + be merged is rotated and translated by appling a transformation matrix. :param PageObject page2: the page to be merged into this one. Should be an instance of :class:`PageObject`. @@ -493,8 +497,8 @@ def mergeRotatedTranslatedPage(self, page2, rotation, tx, ty, expand=False): def mergeRotatedScaledPage(self, page2, rotation, scale, expand=False): """ - This is similar to mergePage, but the stream to be merged is rotated - and scaled by appling a transformation matrix. + mergeRotatedScaledPage is similar to mergePage, but the stream to be + merged is rotated and scaled by appling a transformation matrix. :param PageObject page2: the page to be merged into this one. Should be an instance of :class:`PageObject`. @@ -520,8 +524,8 @@ def mergeRotatedScaledPage(self, page2, rotation, scale, expand=False): def mergeScaledTranslatedPage(self, page2, scale, tx, ty, expand=False): """ - This is similar to mergePage, but the stream to be merged is translated - and scaled by appling a transformation matrix. + mergeScaledTranslatedPage is similar to mergePage, but the stream to be + merged is translated and scaled by appling a transformation matrix. :param PageObject page2: the page to be merged into this one. Should be an instance of :class:`PageObject`. @@ -546,8 +550,9 @@ def mergeRotatedScaledTranslatedPage( self, page2, rotation, scale, tx, ty, expand=False ): """ - This is similar to mergePage, but the stream to be merged is translated, - rotated and scaled by appling a transformation matrix. + mergeRotatedScaledTranslatedPage is similar to mergePage, but the + stream to be merged is translated, rotated and scaled by appling a + transformation matrix. :param PageObject page2: the page to be merged into this one. Should be an instance of :class:`PageObject`. @@ -577,7 +582,7 @@ def mergeRotatedScaledTranslatedPage( def addTransformation(self, ctm): """ - Applies a transformation matrix to the page. + Apply a transformation matrix to the page. :param tuple ctm: A 6-element tuple containing the operands of the transformation matrix. @@ -592,7 +597,7 @@ def addTransformation(self, ctm): def scale(self, sx, sy): """ - Scales a page by the given factors by appling a transformation + Scale a page by the given factors by appling a transformation matrix to its content and updating the page size. :param float sx: The scaling factor on horizontal axis. @@ -630,7 +635,7 @@ def scale(self, sx, sy): def scaleBy(self, factor): """ - Scales a page by the given factor by appling a transformation + Scale a page by the given factor by appling a transformation matrix to its content and updating the page size. :param float factor: The scaling factor (for both X and Y axis). @@ -639,7 +644,7 @@ def scaleBy(self, factor): def scaleTo(self, width, height): """ - Scales a page to the specified dimentions by appling a + Scale a page to the specified dimentions by appling a transformation matrix to its content and updating the page size. :param float width: The new width. @@ -655,7 +660,7 @@ def scaleTo(self, width, height): def compressContentStreams(self): """ - Compresses the size of this page by joining all content streams and + Compress the size of this page by joining all content streams and applying a FlateDecode filter. However, it is possible that this function will perform no action if diff --git a/PyPDF2/_reader.py b/PyPDF2/_reader.py index b3c50f387..a78928bb7 100644 --- a/PyPDF2/_reader.py +++ b/PyPDF2/_reader.py @@ -183,8 +183,10 @@ def producer_raw(self): class PdfFileReader(object): """ - Initializes a PdfFileReader object. This operation can take some time, as - the PDF stream's cross-reference tables are read into memory. + Initialize a PdfFileReader object. + + This operation can take some time, as the PDF stream's cross-reference + tables are read into memory. :param stream: A File object or an object that supports the standard read and seek methods similar to a File object. Could also be a @@ -201,8 +203,8 @@ class PdfFileReader(object): def __init__(self, stream, strict=True, warndest=None, overwriteWarnings=True): if overwriteWarnings: - # Have to dynamically override the default showwarning since there are no - # public methods that specify the 'file' parameter + # Have to dynamically override the default showwarning since there + # are no public methods that specify the 'file' parameter def _showwarning( message, category, filename, lineno, file=warndest, line=None ): @@ -227,7 +229,8 @@ def _showwarning( self._pageId2Num = None # map page IndirectRef number to Page Number if hasattr(stream, "mode") and "b" not in stream.mode: warnings.warn( - "PdfFileReader stream/file object is not in binary mode. It may not be read correctly.", + "PdfFileReader stream/file object is not in binary mode. " + "It may not be read correctly.", PdfReadWarning, ) if isString(stream): @@ -240,13 +243,14 @@ def _showwarning( def getDocumentInfo(self): """ - Retrieves the PDF file's document information dictionary, if it exists. + Retrieve the PDF file's document information dictionary, if it exists. Note that some PDF files use metadata streams instead of docinfo dictionaries, and these metadata streams will not be accessed by this function. :return: the document information of this PDF file - :rtype: :class:`DocumentInformation` or ``None`` if none exists. + :rtype: :class:`DocumentInformation` or + ``None`` if none exists. """ if TK.INFO not in self.trailer: return None @@ -257,12 +261,15 @@ def getDocumentInfo(self): @property def documentInfo(self): - """Read-only property that accesses the :meth:`getDocumentInfo()` function.""" + """ + Read-only property that accesses the + :meth:`getDocumentInfo()` function. + """ return self.getDocumentInfo() def getXmpMetadata(self): """ - Retrieves XMP (Extensible Metadata Platform) data from the PDF document + Retrieve XMP (Extensible Metadata Platform) data from the PDF document root. :return: a :class:`XmpInformation` @@ -505,7 +512,7 @@ def outlines(self): def getOutlines(self, node=None, outlines=None): """ - Retrieves the document outline present in the document. + Retrieve the document outline present in the document. :return: a nested list of :class:`Destinations`. """ @@ -634,6 +641,7 @@ def pages(self): def getPageLayout(self): """ Get the page layout. + See :meth:`setPageLayout()` for a description of valid layouts. @@ -865,7 +873,6 @@ def readObjectHeader(self, stream): readNonWhitespace(stream) stream.seek(-1, 1) if extra and self.strict: - # not a fatal error warnings.warn( "Superfluous whitespace found in object header %s %s" % (idnum, generation), @@ -878,7 +885,6 @@ def cacheGetIndirectObject(self, generation, idnum): return out def cacheIndirectObject(self, generation, idnum, obj): - # return None # Sometimes we want to turn off cache for debugging. if (generation, idnum) in self.resolvedObjects: msg = "Overwriting cache for %s %s" % (generation, idnum) if self.strict: @@ -910,20 +916,7 @@ def read(self, stream): raise PdfReadError("EOF marker not found") line = self.readNextEndLine(stream) - # find startxref entry - the location of the xref table - line = self.readNextEndLine(stream) - try: - startxref = int(line) - except ValueError: - # 'startxref' may be on the same line as the location - if not line.startswith(b_("startxref")): - raise PdfReadError("startxref not found") - startxref = int(line[9:].strip()) - warnings.warn("startxref on same line as offset") - else: - line = self.readNextEndLine(stream) - if line[:9] != b_("startxref"): - raise PdfReadError("startxref not found") + startxref = self._find_startxref_pos(stream) # check and eventually correct the startxref only in not strict xref_issue_nr = self._get_xref_issues(stream, startxref) @@ -1022,6 +1015,23 @@ def read(self, stream): # non-zero-index is actually correct stream.seek(loc, 0) # return to where it was + def _find_startxref_pos(self, stream): + """Find startxref entry - the location of the xref table""" + line = self.readNextEndLine(stream) + try: + startxref = int(line) + except ValueError: + # 'startxref' may be on the same line as the location + if not line.startswith(b_("startxref")): + raise PdfReadError("startxref not found") + startxref = int(line[9:].strip()) + warnings.warn("startxref on same line as offset") + else: + line = self.readNextEndLine(stream) + if line[:9] != b_("startxref"): + raise PdfReadError("startxref not found") + return startxref + def _read_standard_xref_table(self, stream): # standard cross-reference table ref = stream.read(4) @@ -1133,7 +1143,7 @@ def used_before(num, generation): @staticmethod def _get_xref_issues(stream, startxref): - """Returns an int which indicates an issue. 0 means there is no issue.""" + """Return an int which indicates an issue. 0 means there is no issue.""" stream.seek(startxref - 1, 0) # -1 to check character before line = stream.read(1) if line not in b_("\r\n \t"): diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index d5d0a6f45..5df96df89 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -152,7 +152,7 @@ def set_need_appearances_writer(self): def addPage(self, page): """ - Adds a page to this PDF file. The page is usually acquired from a + Add a page to this PDF file. The page is usually acquired from a :class:`PdfFileReader` instance. :param PageObject page: The page to add to the document. Should be @@ -173,7 +173,7 @@ def insertPage(self, page, index=0): def getPage(self, pageNumber): """ - Retrieves a page by number from this PDF file. + Retrieve a page by number from this PDF file. :param int pageNumber: The page number to retrieve (pages begin at zero) @@ -194,7 +194,7 @@ def getNumPages(self): def addBlankPage(self, width=None, height=None): """ - Appends a blank page to this PDF file and returns it. If no page size + Append a blank page to this PDF file and returns it. If no page size is specified, use the size of the last page. :param float width: The width of the new page expressed in default user @@ -212,7 +212,7 @@ def addBlankPage(self, width=None, height=None): def insertBlankPage(self, width=None, height=None, index=0): """ - Inserts a blank page to this PDF file and returns it. If no page size + Insert a blank page to this PDF file and returns it. If no page size is specified, use the size of the last page. :param float width: The width of the new page expressed in default user @@ -496,7 +496,7 @@ def encrypt(self, user_pwd, owner_pwd=None, use_128bit=True, permissions_flag=-1 def write(self, stream): """ - Writes the collection of pages added to this object out as a PDF file. + Write the collection of pages added to this object out as a PDF file. :param stream: An object to write the file to. The object must support the write method and the tell method, similar to a file object. @@ -865,9 +865,7 @@ def addNamedDestination(self, title, pagenum): return dest_ref def removeLinks(self): - """ - Removes links and annotations from this output. - """ + """Remove links and annotations from this output.""" pages = self.getObject(self._pages)[PA.KIDS] for page in pages: page_ref = self.getObject(page) @@ -876,7 +874,7 @@ def removeLinks(self): def removeImages(self, ignoreByteStringObject=False): """ - Removes images from this output. + Remove images from this output. :param bool ignoreByteStringObject: optional parameter to ignore ByteString Objects. @@ -952,7 +950,7 @@ def removeImages(self, ignoreByteStringObject=False): def removeText(self, ignoreByteStringObject=False): """ - Removes text from this output. + Remove text from this output. :param bool ignoreByteStringObject: optional parameter to ignore ByteString Objects. @@ -1058,18 +1056,18 @@ def addURI(self, pagenum, uri, rect, border=None): def addLink(self, pagenum, pagedest, rect, border=None, fit="/Fit", *args): """ - Add an internal link from a rectangular area to the specified page. - - :param int pagenum: index of the page on which to place the link. - :param int pagedest: index of the page to which the link should go. - :param rect: :class:`RectangleObject` or array of four - integers specifying the clickable rectangular area - ``[xLL, yLL, xUR, yUR]``, or string in the form ``"[ xLL yLL xUR yUR ]"``. - :param border: if provided, an array describing border-drawing - properties. See the PDF spec for details. No border will be - drawn if this argument is omitted. - :param str fit: Page fit or 'zoom' option (see below). Additional arguments may need - to be supplied. Passing ``None`` will be read as a null value for that coordinate. + Add an internal link from a rectangular area to the specified page. + + :param int pagenum: index of the page on which to place the link. + :param int pagedest: index of the page to which the link should go. + :param rect: :class:`RectangleObject` or array of four + integers specifying the clickable rectangular area + ``[xLL, yLL, xUR, yUR]``, or string in the form ``"[ xLL yLL xUR yUR ]"``. + :param border: if provided, an array describing border-drawing + properties. See the PDF spec for details. No border will be + drawn if this argument is omitted. + :param str fit: Page fit or 'zoom' option (see below). Additional arguments may need + to be supplied. Passing ``None`` will be read as a null value for that coordinate. .. list-table:: Valid ``zoom`` arguments (see Table 8.2 of the PDF 1.7 reference for details) :widths: 50 200 @@ -1155,6 +1153,7 @@ def addLink(self, pagenum, pagedest, rect, border=None, fit="/Fit", *args): def getPageLayout(self): """ Get the page layout. + See :meth:`setPageLayout()` for a description of valid layouts. :return: Page layout currently being used. diff --git a/PyPDF2/constants.py b/PyPDF2/constants.py index 9bf0fb712..85c563375 100644 --- a/PyPDF2/constants.py +++ b/PyPDF2/constants.py @@ -88,9 +88,7 @@ class FilterTypes: class FilterTypeAbbreviations: - """ - Table 4.44 of the 1.7 Manual (page 353ff) - """ + """Table 4.44 of the 1.7 Manual (page 353ff)""" AHx = "/AHx" A85 = "/A85" diff --git a/PyPDF2/generic.py b/PyPDF2/generic.py index f175439bf..a505f04f1 100644 --- a/PyPDF2/generic.py +++ b/PyPDF2/generic.py @@ -81,7 +81,6 @@ def readObject(stream, pdf): stream.seek(-1, 1) # reset to start idx = ObjectPrefix.find(tok) if idx == 0: - # name object return NameObject.readFromStream(stream, pdf) elif idx == 1: # hexadecimal string OR dictionary @@ -93,16 +92,12 @@ def readObject(stream, pdf): else: return readHexStringFromStream(stream) elif idx == 2: - # array object return ArrayObject.readFromStream(stream, pdf) elif idx == 3 or idx == 4: - # boolean object return BooleanObject.readFromStream(stream) elif idx == 5: - # string object return readStringFromStream(stream) elif idx == 6: - # null object return NullObject.readFromStream(stream) elif idx == 7: # comment @@ -127,7 +122,7 @@ def readObject(stream, pdf): class PdfObject(object): def getObject(self): - """Resolves indirect references.""" + """Resolve indirect references.""" return self @@ -561,7 +556,7 @@ def __getitem__(self, key): def getXmpMetadata(self): """ - Retrieves XMP (Extensible Metadata Platform) data relevant to the + Retrieve XMP (Extensible Metadata Platform) data relevant to the this object, if available. Stability: Added in v1.12, will exist for all future v1.x releases. diff --git a/PyPDF2/merger.py b/PyPDF2/merger.py index 2430a0d34..0e6636e51 100644 --- a/PyPDF2/merger.py +++ b/PyPDF2/merger.py @@ -384,79 +384,85 @@ def _write_bookmarks(self, bookmarks=None, parent=None): bookmarks = self.bookmarks last_added = None - for b in bookmarks: - if isinstance(b, list): - self._write_bookmarks(b, last_added) + for bookmark in bookmarks: + if isinstance(bookmark, list): + self._write_bookmarks(bookmark, last_added) continue - pageno = None + page_no = None pdf = None - if "/Page" in b: - for i, p in enumerate(self.pages): - if p.id == b["/Page"]: - pageno, pdf = self._write_bookmark_on_page(b, p, i) + if "/Page" in bookmark: + for page_no, page in enumerate(self.pages): + if page.id == bookmark["/Page"]: + pdf = self._write_bookmark_on_page(bookmark, page) break - if pageno is not None: - del b["/Page"], b["/Type"] - last_added = self.output.addBookmarkDict(b, parent) + if page_no is not None: + del bookmark["/Page"], bookmark["/Type"] + last_added = self.output.addBookmarkDict(bookmark, parent) - def _write_bookmark_on_page(self, b, p, i): + def _write_bookmark_on_page(self, bookmark, page): # b[NameObject('/Page')] = p.out_pagedata - args = [NumberObject(p.id), NameObject(b["/Type"])] + args = [NumberObject(page.id), NameObject(bookmark["/Type"])] # nothing more to add # if b['/Type'] == '/Fit' or b['/Type'] == '/FitB' - if b["/Type"] == "/FitH" or b["/Type"] == "/FitBH": - if "/Top" in b and not isinstance(b["/Top"], NullObject): - args.append(FloatObject(b["/Top"])) + if bookmark["/Type"] == "/FitH" or bookmark["/Type"] == "/FitBH": + if "/Top" in bookmark and not isinstance(bookmark["/Top"], NullObject): + args.append(FloatObject(bookmark["/Top"])) else: args.append(FloatObject(0)) - del b["/Top"] - elif b["/Type"] == "/FitV" or b["/Type"] == "/FitBV": - if "/Left" in b and not isinstance(b["/Left"], NullObject): - args.append(FloatObject(b["/Left"])) + del bookmark["/Top"] + elif bookmark["/Type"] == "/FitV" or bookmark["/Type"] == "/FitBV": + if "/Left" in bookmark and not isinstance(bookmark["/Left"], NullObject): + args.append(FloatObject(bookmark["/Left"])) else: args.append(FloatObject(0)) - del b["/Left"] - elif b["/Type"] == "/XYZ": - if "/Left" in b and not isinstance(b["/Left"], NullObject): - args.append(FloatObject(b["/Left"])) + del bookmark["/Left"] + elif bookmark["/Type"] == "/XYZ": + if "/Left" in bookmark and not isinstance(bookmark["/Left"], NullObject): + args.append(FloatObject(bookmark["/Left"])) else: args.append(FloatObject(0)) - if "/Top" in b and not isinstance(b["/Top"], NullObject): - args.append(FloatObject(b["/Top"])) + if "/Top" in bookmark and not isinstance(bookmark["/Top"], NullObject): + args.append(FloatObject(bookmark["/Top"])) else: args.append(FloatObject(0)) - if "/Zoom" in b and not isinstance(b["/Zoom"], NullObject): - args.append(FloatObject(b["/Zoom"])) + if "/Zoom" in bookmark and not isinstance(bookmark["/Zoom"], NullObject): + args.append(FloatObject(bookmark["/Zoom"])) else: args.append(FloatObject(0)) - del b["/Top"], b["/Zoom"], b["/Left"] - elif b["/Type"] == "/FitR": - if "/Left" in b and not isinstance(b["/Left"], NullObject): - args.append(FloatObject(b["/Left"])) + del bookmark["/Top"], bookmark["/Zoom"], bookmark["/Left"] + elif bookmark["/Type"] == "/FitR": + if "/Left" in bookmark and not isinstance(bookmark["/Left"], NullObject): + args.append(FloatObject(bookmark["/Left"])) else: args.append(FloatObject(0)) - if "/Bottom" in b and not isinstance(b["/Bottom"], NullObject): - args.append(FloatObject(b["/Bottom"])) + if "/Bottom" in bookmark and not isinstance( + bookmark["/Bottom"], NullObject + ): + args.append(FloatObject(bookmark["/Bottom"])) else: args.append(FloatObject(0)) - if "/Right" in b and not isinstance(b["/Right"], NullObject): - args.append(FloatObject(b["/Right"])) + if "/Right" in bookmark and not isinstance(bookmark["/Right"], NullObject): + args.append(FloatObject(bookmark["/Right"])) else: args.append(FloatObject(0)) - if "/Top" in b and not isinstance(b["/Top"], NullObject): - args.append(FloatObject(b["/Top"])) + if "/Top" in bookmark and not isinstance(bookmark["/Top"], NullObject): + args.append(FloatObject(bookmark["/Top"])) else: args.append(FloatObject(0)) - del b["/Left"], b["/Right"], b["/Bottom"], b["/Top"] + del ( + bookmark["/Left"], + bookmark["/Right"], + bookmark["/Bottom"], + bookmark["/Top"], + ) - b[NameObject("/A")] = DictionaryObject( + bookmark[NameObject("/A")] = DictionaryObject( {NameObject("/S"): NameObject("/GoTo"), NameObject("/D"): ArrayObject(args)} ) - pageno = i - pdf = p.src # noqa: F841 - return (pageno, pdf) + pdf = page.src # noqa: F841 + return pdf def _associate_dests_to_pages(self, pages): for nd in self.named_dests: From a37c0ffc8ae8696f2d269a9568e8ac71d9086531 Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Sat, 30 Apr 2022 16:18:34 +0200 Subject: [PATCH 2/4] pytest style --- PyPDF2/merger.py | 59 ++++++++++++++++++++++------------------ Tests/test_filters.py | 12 ++++---- Tests/test_generic.py | 6 ++-- Tests/test_javascript.py | 2 +- Tests/test_page.py | 6 ++-- Tests/test_pagerange.py | 8 +++--- Tests/test_papersizes.py | 3 +- Tests/test_reader.py | 22 +++++++-------- Tests/test_utils.py | 11 ++++---- Tests/test_writer.py | 4 +-- Tests/test_xmp.py | 2 +- 11 files changed, 72 insertions(+), 63 deletions(-) diff --git a/PyPDF2/merger.py b/PyPDF2/merger.py index 0e6636e51..b8ecccee8 100644 --- a/PyPDF2/merger.py +++ b/PyPDF2/merger.py @@ -60,9 +60,9 @@ def __init__(self, pagedata, src, id): class PdfFileMerger(object): """ - Initializes a ``PdfFileMerger`` object. ``PdfFileMerger`` merges multiple PDFs - into a single PDF. It can concatenate, slice, insert, or any combination - of the above. + Initializes a ``PdfFileMerger`` object. ``PdfFileMerger`` merges multiple + PDFs into a single PDF. It can concatenate, slice, insert, or any + combination of the above. See the functions :meth:`merge()` (or :meth:`append()`) and :meth:`write()` for usage information. @@ -95,19 +95,21 @@ def merge( :param int position: The *page number* to insert this file. File will be inserted after the given number. - :param fileobj: A File Object or an object that supports the standard read - and seek methods similar to a File Object. Could also be a + :param fileobj: A File Object or an object that supports the standard + read and seek methods similar to a File Object. Could also be a string representing a path to a PDF file. - :param str bookmark: Optionally, you may specify a bookmark to be applied at - the beginning of the included file by supplying the text of the bookmark. + :param str bookmark: Optionally, you may specify a bookmark to be + applied at the beginning of the included file by supplying the text + of the bookmark. - :param pages: can be a :class:`PageRange` or a ``(start, stop[, step])`` tuple + :param pages: can be a :class:`PageRange` + or a ``(start, stop[, step])`` tuple to merge only the specified range of pages from the source document into the output document. - :param bool import_bookmarks: You may prevent the source document's bookmarks - from being imported by specifying this as ``False``. + :param bool import_bookmarks: You may prevent the source document's + bookmarks from being imported by specifying this as ``False``. """ # This parameter is passed to self.inputs.append and means @@ -135,7 +137,10 @@ def merge( orig_tell = fileobj.stream.tell() fileobj.stream.seek(0) filecontent = StreamIO(fileobj.stream.read()) - fileobj.stream.seek(orig_tell) # reset the stream to its original location + + # reset the stream to its original location + fileobj.stream.seek(orig_tell) + fileobj = filecontent my_file = True @@ -199,22 +204,25 @@ def merge( def append(self, fileobj, bookmark=None, pages=None, import_bookmarks=True): """ - Identical to the :meth:`merge()` method, but assumes you want to concatenate - all pages onto the end of the file instead of specifying a position. + Identical to the :meth:`merge()` method, but assumes you want to + concatenate all pages onto the end of the file instead of specifying a + position. - :param fileobj: A File Object or an object that supports the standard read - and seek methods similar to a File Object. Could also be a + :param fileobj: A File Object or an object that supports the standard + read and seek methods similar to a File Object. Could also be a string representing a path to a PDF file. - :param str bookmark: Optionally, you may specify a bookmark to be applied at - the beginning of the included file by supplying the text of the bookmark. + :param str bookmark: Optionally, you may specify a bookmark to be + applied at the beginning of the included file by supplying the text + of the bookmark. - :param pages: can be a :class:`PageRange` or a ``(start, stop[, step])`` tuple + :param pages: can be a :class:`PageRange` + or a ``(start, stop[, step])`` tuple to merge only the specified range of pages from the source document into the output document. - :param bool import_bookmarks: You may prevent the source document's bookmarks - from being imported by specifying this as ``False``. + :param bool import_bookmarks: You may prevent the source document's + bookmarks from being imported by specifying this as ``False``. """ self.merge(len(self.pages), fileobj, bookmark, pages, import_bookmarks) @@ -231,7 +239,8 @@ def write(self, fileobj): my_file = True # Add pages to the PdfFileWriter - # The commented out line below was replaced with the two lines below it to allow PdfFileMerger to work with PyPdf 1.13 + # The commented out line below was replaced with the two lines below it + # to allow PdfFileMerger to work with PyPdf 1.13 for page in self.pages: self.output.addPage(page.pagedata) page.out_pagedata = self.output.getReference( @@ -390,11 +399,10 @@ def _write_bookmarks(self, bookmarks=None, parent=None): continue page_no = None - pdf = None if "/Page" in bookmark: - for page_no, page in enumerate(self.pages): + for page_no, page in enumerate(self.pages): # noqa: B007 if page.id == bookmark["/Page"]: - pdf = self._write_bookmark_on_page(bookmark, page) + self._write_bookmark_on_page(bookmark, page) break if page_no is not None: del bookmark["/Page"], bookmark["/Type"] @@ -461,9 +469,6 @@ def _write_bookmark_on_page(self, bookmark, page): {NameObject("/S"): NameObject("/GoTo"), NameObject("/D"): ArrayObject(args)} ) - pdf = page.src # noqa: F841 - return pdf - def _associate_dests_to_pages(self, pages): for nd in self.named_dests: pageno = None diff --git a/Tests/test_filters.py b/Tests/test_filters.py index 274de60e3..7ccbb5e8e 100644 --- a/Tests/test_filters.py +++ b/Tests/test_filters.py @@ -19,7 +19,9 @@ ) -@pytest.mark.parametrize("predictor, s", list(cartesian_product([1], filter_inputs))) +@pytest.mark.parametrize( + ("predictor", "s"), list(cartesian_product([1], filter_inputs)) +) def test_FlateDecode(predictor, s): """ Tests FlateDecode decode() and encode() methods. @@ -46,7 +48,7 @@ def test_FlateDecode_unsupported_predictor(): @pytest.mark.parametrize( - "input,expected", + ("input", "expected"), [ (">", ""), ( @@ -80,7 +82,7 @@ def test_FlateDecode_unsupported_predictor(): "whitespace", ], ) -@pytest.mark.no_py27 +@pytest.mark.no_py27() def test_ASCIIHexDecode(input, expected): """ Feeds a bunch of values to ASCIIHexDecode.decode() and ensures the @@ -100,7 +102,7 @@ def test_ASCIIHexDecode_no_eod(): assert exc.value.args[0] == "Unexpected EOD in ASCIIHexDecode" -@pytest.mark.xfail +@pytest.mark.xfail() def test_ASCII85Decode_with_overflow(): inputs = ( v + "~>" @@ -119,7 +121,7 @@ def test_ASCII85Decode_with_overflow(): assert exc.value.args[0] == "" -@pytest.mark.no_py27 +@pytest.mark.no_py27() def test_ASCII85Decode_five_zero_bytes(): """ From ISO 32000 (2008) ยง7.4.3: diff --git a/Tests/test_generic.py b/Tests/test_generic.py index c54237b03..18cad22db 100644 --- a/Tests/test_generic.py +++ b/Tests/test_generic.py @@ -43,7 +43,7 @@ def test_createStringObject_exception(): @pytest.mark.parametrize( - "value, expected, tell", [(b"true", b"true", 4), (b"false", b"false", 5)] + ("value", "expected", "tell"), [(b"true", b"true", 4), (b"false", b"false", 5)] ) def test_boolean_object(value, expected, tell): stream = BytesIO(value) @@ -179,7 +179,7 @@ def test_bookmark_write_to_stream(): assert stream.read() == b"<<\n/Title title\n/Dest [ /FitV 0 ]\n>>" -@pytest.mark.no_py27 +@pytest.mark.no_py27() def test_encode_pdfdocencoding_keyerror(): with pytest.raises(UnicodeEncodeError) as exc: encode_pdfdocencoding("๐Ÿ˜€") @@ -201,7 +201,7 @@ def test_readObject_comment(): assert out == 1 -@pytest.mark.no_py27 +@pytest.mark.no_py27() def test_ByteStringObject(): bo = ByteStringObject("stream", encoding="utf-8") stream = BytesIO(b"") diff --git a/Tests/test_javascript.py b/Tests/test_javascript.py index 48ad9530e..f9b2187fc 100644 --- a/Tests/test_javascript.py +++ b/Tests/test_javascript.py @@ -10,7 +10,7 @@ RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "Resources") -@pytest.fixture +@pytest.fixture() def pdf_file_writer(): reader = PdfFileReader(os.path.join(RESOURCE_ROOT, "crazyones.pdf")) writer = PdfFileWriter() diff --git a/Tests/test_page.py b/Tests/test_page.py index d547ea91f..2f83fd4b1 100644 --- a/Tests/test_page.py +++ b/Tests/test_page.py @@ -22,7 +22,7 @@ def get_all_sample_files(): all_files_meta = get_all_sample_files() -@pytest.mark.external +@pytest.mark.external() @pytest.mark.parametrize( "meta", [m for m in all_files_meta["data"] if not m["encrypted"]], @@ -36,7 +36,7 @@ def test_read(meta): @pytest.mark.parametrize( - "pdf_path, password", + ("pdf_path", "password"), [ ("crazyones.pdf", None), ("attachment.pdf", None), @@ -76,7 +76,7 @@ def test_page_operations(pdf_path, password): @pytest.mark.parametrize( - "pdf_path, password", + ("pdf_path", "password"), [ (os.path.join(RESOURCE_ROOT, "crazyones.pdf"), None), (os.path.join(RESOURCE_ROOT, "attachment.pdf"), None), diff --git a/Tests/test_pagerange.py b/Tests/test_pagerange.py index 4fffe238c..1a7058280 100644 --- a/Tests/test_pagerange.py +++ b/Tests/test_pagerange.py @@ -10,7 +10,7 @@ def test_equality(): @pytest.mark.parametrize( - "page_range,expected", + ("page_range", "expected"), [(slice(0, 5), "0:5"), (slice(0, 5, 2), "0:5:2"), ("-1", "-1:"), ("0", "0")], ) def test_str(page_range, expected): @@ -18,7 +18,7 @@ def test_str(page_range, expected): @pytest.mark.parametrize( - "page_range,expected", + ("page_range", "expected"), [(slice(0, 5), "PageRange('0:5')"), (slice(0, 5, 2), "PageRange('0:5:2')")], ) def test_repr(page_range, expected): @@ -38,7 +38,7 @@ def test_idempotency(): @pytest.mark.parametrize( - "range_str,expected", + ("range_str", "expected"), [ ("42", slice(42, 43)), ("1:2", slice(1, 2)), @@ -59,7 +59,7 @@ def test_str_init_error(): @pytest.mark.parametrize( - "params,expected", + ("params", "expected"), [ (["foo.pdf", "1:5"], [("foo.pdf", PageRange("1:5"))]), ( diff --git a/Tests/test_papersizes.py b/Tests/test_papersizes.py index aeb4e1acd..35ac2acea 100644 --- a/Tests/test_papersizes.py +++ b/Tests/test_papersizes.py @@ -24,7 +24,8 @@ def test_din_a_ratio(dimensions): @pytest.mark.parametrize( - "dimensions_a, dimensions_b", list(zip(papersizes._din_a, papersizes._din_a[1:])) + ("dimensions_a", "dimensions_b"), + list(zip(papersizes._din_a, papersizes._din_a[1:])), ) def test_din_a_doubling(dimensions_a, dimensions_b): assert abs(dimensions_a.height - 2 * dimensions_b.width) <= 4 diff --git a/Tests/test_reader.py b/Tests/test_reader.py index 00bc81c73..6b4782680 100644 --- a/Tests/test_reader.py +++ b/Tests/test_reader.py @@ -27,7 +27,8 @@ @pytest.mark.parametrize( - "src,num_pages", [("selenium-PyPDF2-issue-177.pdf", 1), ("pdflatex-outline.pdf", 4)] + ("src", "num_pages"), + [("selenium-PyPDF2-issue-177.pdf", 1), ("pdflatex-outline.pdf", 4)], ) def test_get_num_pages(src, num_pages): src = os.path.join(RESOURCE_ROOT, src) @@ -36,7 +37,7 @@ def test_get_num_pages(src, num_pages): @pytest.mark.parametrize( - "pdf_path, expected", + ("pdf_path", "expected"), [ ( os.path.join(RESOURCE_ROOT, "crazyones.pdf"), @@ -129,7 +130,7 @@ def test_get_attachments(src): @pytest.mark.parametrize( - "src,outline_elements", + ("src", "outline_elements"), [ (os.path.join(RESOURCE_ROOT, "pdflatex-outline.pdf"), 9), (os.path.join(RESOURCE_ROOT, "crazyones.pdf"), 0), @@ -142,7 +143,7 @@ def test_get_outlines(src, outline_elements): @pytest.mark.parametrize( - "src,nb_images", + ("src", "nb_images"), [ ("pdflatex-outline.pdf", 0), ("crazyones.pdf", 0), @@ -184,7 +185,7 @@ def test_get_images(src, nb_images): @pytest.mark.parametrize( - "strict,with_prev_0,startx_correction,should_fail", + ("strict", "with_prev_0", "startx_correction", "should_fail"), [ (True, False, -1, False), # all nominal => no fail (True, True, -1, True), # Prev=0 => fail expected @@ -250,7 +251,6 @@ def test_issue297(): path = os.path.join(RESOURCE_ROOT, "issue-297.pdf") with pytest.raises(PdfReadError) as exc: reader = PdfFileReader(path, strict=True) - reader.getPage(0) assert "Broken xref table" in exc.value.args[0] reader = PdfFileReader(path, strict=False) reader.getPage(0) @@ -273,7 +273,7 @@ def test_get_page_of_encrypted_file(): @pytest.mark.parametrize( - "src,expected,expected_get_fields", + ("src", "expected", "expected_get_fields"), [ ( "form.pdf", @@ -324,7 +324,7 @@ def test_get_form(src, expected, expected_get_fields): @pytest.mark.parametrize( - "src,page_nb", + ("src", "page_nb"), [ ("form.pdf", 0), ("pdflatex-outline.pdf", 2), @@ -338,7 +338,7 @@ def test_get_page_number(src, page_nb): @pytest.mark.parametrize( - "src,expected", + ("src", "expected"), [ ("form.pdf", None), ], @@ -350,7 +350,7 @@ def test_get_page_layout(src, expected): @pytest.mark.parametrize( - "src,expected", + ("src", "expected"), [ ("form.pdf", "/UseNone"), ("crazyones.pdf", None), @@ -483,8 +483,8 @@ def test_read_unknown_zero_pages(): pdf_data.find(b"xref") - 1, ) pdf_stream = io.BytesIO(pdf_data) + reader = PdfFileReader(pdf_stream, strict=True) with pytest.raises(PdfReadError) as exc: - reader = PdfFileReader(pdf_stream, strict=True) reader.numPages assert exc.value.args[0] == "Could not find object." diff --git a/Tests/test_utils.py b/Tests/test_utils.py index 67308b501..db7727a4a 100644 --- a/Tests/test_utils.py +++ b/Tests/test_utils.py @@ -14,7 +14,8 @@ @pytest.mark.parametrize( - "value,expected", [(0, True), (-1, True), (1, True), ("1", False), (1.5, False)] + ("value", "expected"), + [(0, True), (-1, True), (1, True), ("1", False), (1.5, False)], ) def test_isInt(value, expected): assert PyPDF2.utils.isInt(value) == expected @@ -25,7 +26,7 @@ def test_isBytes(): @pytest.mark.parametrize( - "stream,expected", + ("stream", "expected"), [ (io.BytesIO(b"foo"), False), (io.BytesIO(b""), False), @@ -44,7 +45,7 @@ def test_readUntilWhitespace(): @pytest.mark.parametrize( - "stream,remainder", + ("stream", "remainder"), [ (io.BytesIO(b"% foobar\n"), b""), (io.BytesIO(b""), b""), @@ -74,7 +75,7 @@ def test_readUntilRegex_premature_ending_name(): @pytest.mark.parametrize( - "a,b,expected", + ("a", "b", "expected"), [ ([[3]], [[7]], [[21]]), ([[3, 7]], [[5], [13]], [[3 * 5.0 + 7 * 13]]), @@ -103,7 +104,7 @@ def test_hexStr(): assert PyPDF2.utils.hexStr(10) == "0xa" -@pytest.mark.no_py27 +@pytest.mark.no_py27() def test_b(): assert PyPDF2.utils.b_("foo") == b"foo" assert PyPDF2.utils.b_("๐Ÿ˜€") == "๐Ÿ˜€".encode("utf-8") diff --git a/Tests/test_writer.py b/Tests/test_writer.py index 4e8013b1f..31b1aebdc 100644 --- a/Tests/test_writer.py +++ b/Tests/test_writer.py @@ -72,7 +72,7 @@ def test_writer_operations(): @pytest.mark.parametrize( - "input_path,ignoreByteStringObject", + ("input_path", "ignoreByteStringObject"), [ ("side-by-side-subfig.pdf", False), ("reportlab-inline-image.pdf", True), @@ -104,7 +104,7 @@ def test_remove_images(input_path, ignoreByteStringObject): @pytest.mark.parametrize( - "input_path,ignoreByteStringObject", + ("input_path", "ignoreByteStringObject"), [ ("side-by-side-subfig.pdf", False), ("side-by-side-subfig.pdf", True), diff --git a/Tests/test_xmp.py b/Tests/test_xmp.py index e0aed7900..a360faf98 100644 --- a/Tests/test_xmp.py +++ b/Tests/test_xmp.py @@ -11,7 +11,7 @@ @pytest.mark.parametrize( - "src,has_xmp", + ("src", "has_xmp"), [ (os.path.join(RESOURCE_ROOT, "commented-xmp.pdf"), True), (os.path.join(RESOURCE_ROOT, "crazyones.pdf"), False), From cee15f00a36ac5141b168369feebb8f954527c50 Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Sat, 30 Apr 2022 17:56:35 +0200 Subject: [PATCH 3/4] constants --- PyPDF2/_page.py | 42 ++++++++--------- PyPDF2/_reader.py | 34 +++++++------- PyPDF2/_writer.py | 11 +++-- PyPDF2/constants.py | 107 +++++++++++++++++++++++++++++++++++++------ PyPDF2/filters.py | 40 ++++++++-------- PyPDF2/merger.py | 17 +++---- PyPDF2/xmp.py | 16 +++---- Tests/test_reader.py | 2 +- 8 files changed, 174 insertions(+), 95 deletions(-) diff --git a/PyPDF2/_page.py b/PyPDF2/_page.py index 7e2eaa226..d4697570f 100644 --- a/PyPDF2/_page.py +++ b/PyPDF2/_page.py @@ -126,8 +126,8 @@ def createBlankPage(pdf=None, width=None, height=None): page = PageObject(pdf) # Creates a new page (cf PDF Reference 7.7.3.3) - page.__setitem__(NameObject("/Type"), NameObject("/Page")) - page.__setitem__(NameObject("/Parent"), NullObject()) + page.__setitem__(NameObject(PG.TYPE), NameObject("/Page")) + page.__setitem__(NameObject(PG.PARENT), NullObject()) page.__setitem__(NameObject(PG.RESOURCES), DictionaryObject()) if width is None or height is None: if pdf is not None and pdf.getNumPages() > 0: @@ -167,11 +167,11 @@ def rotateCounterClockwise(self, angle): return self def _rotate(self, angle): - rotate_obj = self.get("/Rotate", 0) + rotate_obj = self.get(PG.ROTATE, 0) current_angle = ( rotate_obj if isinstance(rotate_obj, int) else rotate_obj.getObject() ) - self[NameObject("/Rotate")] = NumberObject(current_angle + angle) + self[NameObject(PG.ROTATE)] = NumberObject(current_angle + angle) @staticmethod def _mergeResources(res1, res2, resource): @@ -247,8 +247,8 @@ def getContents(self): :return: the ``/Contents`` object, or ``None`` if it doesn't exist. ``/Contents`` is optional, as described in PDF Reference 7.7.3.3 """ - if "/Contents" in self: - return self["/Contents"].getObject() + if PG.CONTENTS in self: + return self[PG.CONTENTS].getObject() else: return None @@ -286,13 +286,13 @@ def _mergePage(self, page2, page2transformation=None, ctm=None, expand=False): new_annots.append(ref) for res in ( - "/ExtGState", + RES.EXT_G_STATE, RES.FONT, RES.XOBJECT, RES.COLOR_SPACE, - "/Pattern", - "/Shading", - "/Properties", + RES.PATTERN, + RES.SHADING, + RES.PROPERTIES, ): new, newrename = PageObject._mergeResources( original_resources, page2resources, res @@ -302,11 +302,11 @@ def _mergePage(self, page2, page2transformation=None, ctm=None, expand=False): rename.update(newrename) # Combine /ProcSet sets. - new_resources[NameObject(RES.PROCSET)] = ArrayObject( + new_resources[NameObject(RES.PROC_SET)] = ArrayObject( frozenset( - original_resources.get(RES.PROCSET, ArrayObject()).getObject() + original_resources.get(RES.PROC_SET, ArrayObject()).getObject() ).union( - frozenset(page2resources.get(RES.PROCSET, ArrayObject()).getObject()) + frozenset(page2resources.get(RES.PROC_SET, ArrayObject()).getObject()) ) ) @@ -386,7 +386,7 @@ def _mergePage(self, page2, page2transformation=None, ctm=None, expand=False): self.mediaBox.setLowerLeft(lowerleft) self.mediaBox.setUpperRight(upperright) - self[NameObject("/Contents")] = ContentStream(new_content_array, self.pdf) + self[NameObject(PG.CONTENTS)] = ContentStream(new_content_array, self.pdf) self[NameObject(PG.RESOURCES)] = new_resources self[NameObject(PG.ANNOTS)] = new_annots @@ -593,7 +593,7 @@ def addTransformation(self, ctm): original_content, self.pdf, ctm ) new_content = PageObject._pushPopGS(new_content, self.pdf) - self[NameObject("/Contents")] = new_content + self[NameObject(PG.CONTENTS)] = new_content def scale(self, sx, sy): """ @@ -612,8 +612,8 @@ def scale(self, sx, sy): float(self.mediaBox.getUpperRight_y()) * sy, ] ) - if "/VP" in self: - viewport = self["/VP"] + if PG.VP in self: + viewport = self[PG.VP] if isinstance(viewport, ArrayObject): bbox = viewport[0]["/BBox"] else: @@ -627,11 +627,11 @@ def scale(self, sx, sy): ] ) if isinstance(viewport, ArrayObject): - self[NameObject("/VP")][NumberObject(0)][ + self[NameObject(PG.VP)][NumberObject(0)][ NameObject("/BBox") ] = scaled_bbox else: - self[NameObject("/VP")][NameObject("/BBox")] = scaled_bbox + self[NameObject(PG.VP)][NameObject("/BBox")] = scaled_bbox def scaleBy(self, factor): """ @@ -670,7 +670,7 @@ def compressContentStreams(self): if content is not None: if not isinstance(content, ContentStream): content = ContentStream(content, self.pdf) - self[NameObject("/Contents")] = content.flateEncode() + self[NameObject(PG.CONTENTS)] = content.flateEncode() def extractText(self, Tj_sep="", TJ_sep=""): """ @@ -684,7 +684,7 @@ def extractText(self, Tj_sep="", TJ_sep=""): :return: a unicode string object. """ text = u_("") - content = self["/Contents"].getObject() + content = self[PG.CONTENTS].getObject() if not isinstance(content, ContentStream): content = ContentStream(content, self.pdf) # Note: we check all strings are TextStringObjects. ByteStringObjects diff --git a/PyPDF2/_reader.py b/PyPDF2/_reader.py index a78928bb7..4aff87c3e 100644 --- a/PyPDF2/_reader.py +++ b/PyPDF2/_reader.py @@ -40,6 +40,8 @@ from PyPDF2._security import _alg33_1, _alg34, _alg35 from PyPDF2.constants import CatalogAttributes as CA from PyPDF2.constants import Core as CO +from PyPDF2.constants import DocumentInformationAttributes as DI +from PyPDF2.constants import EncryptionDictAttributes as ED from PyPDF2.constants import PageAttributes as PG from PyPDF2.constants import PagesAttributes as PA from PyPDF2.constants import StreamAttributes as SA @@ -118,39 +120,39 @@ def title(self): Returns a unicode string (``TextStringObject``) or ``None`` if the title is not specified.""" return ( - self.getText("/Title") or self.get("/Title").getObject() - if self.get("/Title") + self.getText(DI.TITLE) or self.get(DI.TITLE).getObject() + if self.get(DI.TITLE) else None ) @property def title_raw(self): """The "raw" version of title; can return a ``ByteStringObject``.""" - return self.get("/Title") + return self.get(DI.TITLE) @property def author(self): """Read-only property accessing the document's **author**. Returns a unicode string (``TextStringObject``) or ``None`` if the author is not specified.""" - return self.getText("/Author") + return self.getText(DI.AUTHOR) @property def author_raw(self): """The "raw" version of author; can return a ``ByteStringObject``.""" - return self.get("/Author") + return self.get(DI.AUTHOR) @property def subject(self): """Read-only property accessing the document's **subject**. Returns a unicode string (``TextStringObject``) or ``None`` if the subject is not specified.""" - return self.getText("/Subject") + return self.getText(DI.SUBJECT) @property def subject_raw(self): """The "raw" version of subject; can return a ``ByteStringObject``.""" - return self.get("/Subject") + return self.get(DI.SUBJECT) @property def creator(self): @@ -159,12 +161,12 @@ def creator(self): application (e.g. OpenOffice) that created the original document from which it was converted. Returns a unicode string (``TextStringObject``) or ``None`` if the creator is not specified.""" - return self.getText("/Creator") + return self.getText(DI.CREATOR) @property def creator_raw(self): """The "raw" version of creator; can return a ``ByteStringObject``.""" - return self.get("/Creator") + return self.get(DI.CREATOR) @property def producer(self): @@ -173,12 +175,12 @@ def producer(self): the name of the application (for example, OSX Quartz) that converted it to PDF. Returns a unicode string (``TextStringObject``) or ``None`` if the producer is not specified.""" - return self.getText("/Producer") + return self.getText(DI.PRODUCER) @property def producer_raw(self): """The "raw" version of producer; can return a ``ByteStringObject``.""" - return self.get("/Producer") + return self.get(DI.PRODUCER) class PdfFileReader(object): @@ -1357,9 +1359,9 @@ def _decrypt(self, password): def _authenticateUserPassword(self, password): encrypt = self.trailer[TK.ENCRYPT].getObject() - rev = encrypt["/R"].getObject() - owner_entry = encrypt["/O"].getObject() - p_entry = encrypt["/P"].getObject() + rev = encrypt[ED.R].getObject() + owner_entry = encrypt[ED.O].getObject() + p_entry = encrypt[ED.P].getObject() if TK.ID in self.trailer: id_entry = self.trailer[TK.ID].getObject() else: @@ -1368,7 +1370,7 @@ def _authenticateUserPassword(self, password): # https://github.com/mstamy2/PyPDF2/issues/608 id_entry = ArrayObject([ByteStringObject(b""), ByteStringObject(b"")]) id1_entry = id_entry[0].getObject() - real_U = encrypt["/U"].getObject().original_bytes + real_U = encrypt[ED.U].getObject().original_bytes if rev == 2: U, key = _alg34(password, owner_entry, p_entry, id1_entry) elif rev >= 3: @@ -1379,7 +1381,7 @@ def _authenticateUserPassword(self, password): owner_entry, p_entry, id1_entry, - encrypt.get("/EncryptMetadata", BooleanObject(False)).getObject(), + encrypt.get(ED.ENCRYPT_METADATA, BooleanObject(False)).getObject(), ) U, real_U = U[:16], real_U[:16] return U == real_U, key diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 5df96df89..67c6cba7e 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -40,6 +40,7 @@ from PyPDF2._security import _alg33, _alg34, _alg35 from PyPDF2.constants import CatalogAttributes as CA from PyPDF2.constants import Core as CO +from PyPDF2.constants import EncryptionDictAttributes as ED from PyPDF2.constants import PageAttributes as PG from PyPDF2.constants import PagesAttributes as PA from PyPDF2.constants import StreamAttributes as SA @@ -396,7 +397,7 @@ def updatePageFormFieldValues(self, page, fields, flags=0): PDF Reference Table 8.70 for details. """ # Iterate through pages, update field values - for j in range(0, len(page[PG.ANNOTS])): + for j in range(len(page[PG.ANNOTS])): writer_annot = page[PG.ANNOTS][j].getObject() # retrieve parent field values, if present writer_parent_annot = {} # fallback if it's not there @@ -487,10 +488,10 @@ def encrypt(self, user_pwd, owner_pwd=None, use_128bit=True, permissions_flag=-1 encrypt[NameObject("/V")] = NumberObject(V) if V == 2: encrypt[NameObject(SA.LENGTH)] = NumberObject(keylen * 8) - encrypt[NameObject("/R")] = NumberObject(rev) - encrypt[NameObject("/O")] = ByteStringObject(O) - encrypt[NameObject("/U")] = ByteStringObject(U) - encrypt[NameObject("/P")] = NumberObject(P) + encrypt[NameObject(ED.R)] = NumberObject(rev) + encrypt[NameObject(ED.O)] = ByteStringObject(O) + encrypt[NameObject(ED.U)] = ByteStringObject(U) + encrypt[NameObject(ED.P)] = NumberObject(P) self._encrypt = self._addObject(encrypt) self._encrypt_key = key diff --git a/PyPDF2/constants.py b/PyPDF2/constants.py index 85c563375..de40148b2 100644 --- a/PyPDF2/constants.py +++ b/PyPDF2/constants.py @@ -31,6 +31,33 @@ class CatalogAttributes: DESTS = "/Dests" +class EncryptionDictAttributes: + """ + Additional encryption dictionary entries for the standard security handler + + TABLE 3.19, Page 122 + """ + + R = "/R" # number, required; revision of the standard security handler + O = "/O" # 32-byte string, required + U = "/U" # 32-byte string, required + P = "/P" # integer flag, required; permitted operations + ENCRYPT_METADATA = "/EncryptMetadata" # boolean flag, optional + + +class Ressources: + """TABLE 3.30 Entries in a resource dictionary""" + + EXT_G_STATE = "/ExtGState" # dictionary, optional + COLOR_SPACE = "/ColorSpace" # dictionary, optional + PATTERN = "/Pattern" # dictionary, optional + SHADING = "/Shading" # dictionary, optional + XOBJECT = "/XObject" # dictionary, optional + FONT = "/Font" # dictionary, optional + PROC_SET = "/ProcSet" # array, optional + PROPERTIES = "/Properties" # dictionary, optional + + class PagesAttributes: """Page Attributes, Table 6.2, Page 52""" @@ -41,26 +68,39 @@ class PagesAttributes: class PageAttributes: - """Page attributes, Table 6.3, Page 53""" + """TABLE 3.27 Entries in a page object""" TYPE = "/Type" # name, required; must be /Page - MEDIABOX = "/MediaBox" # array, required; rectangle specifying page size PARENT = "/Parent" # dictionary, required; a pages object + LAST_MODIFIED = ( + "/LastModified" # date, optional; date and time of last modification + ) RESOURCES = "/Resources" # dictionary, required if there are any + MEDIABOX = "/MediaBox" # rectangle, required; rectangle specifying page size + CROPBOX = "/CropBox" # rectangle, optional; rectangle + BLEEDBOX = "/BleedBox" # rectangle, optional; rectangle + TRIMBOX = "/TrimBox" # rectangle, optional; rectangle + ARTBOX = "/ArtBox" # rectangle, optional; rectangle + BOX_COLOR_INFO = "/BoxColorInfo" # dictionary, optional CONTENTS = "/Contents" # stream or array, optional - CROPBOX = "/CropBox" # array, optional; rectangle ROTATE = "/Rotate" # integer, optional; page rotation in degrees + GROUP = "/Group" # dictionary, optional; page group THUMB = "/Thumb" # stream, optional; indirect reference to image of the page + B = "/B" # array, optional + DUR = "/Dur" # number, optional + TRANS = "/Trans" # dictionary, optional ANNOTS = "/Annots" # array, optional; an array of annotations - - -class Ressources: - PROCSET = "/ProcSet" # Chapter 6.8.1 - FONT = "/Font" # Chapter 6.8.2 - # encoding - # font descriptors : 6.8.4 - COLOR_SPACE = "/ColorSpace" # Chapter 6.8.5 - XOBJECT = "/XObject" # Chapter 6.8.6 + AA = "/AA" # dictionary, optional + METADATA = "/Metadata" # stream, optional + PIECE_INFO = "/PieceInfo" # dictionary, optional + STRUCT_PARENTS = "/StructParents" # integer, optional + ID = "/ID" # byte string, optional + PZ = "/PZ" # number, optional + TABS = "/Tabs" # name, optional + TEMPLATE_INSTANTIATED = "/TemplateInstantiated" # name, optional + PRES_STEPS = "/PresSteps" # dictionary, optional + USER_UNIT = "/UserUnit" # number, optional + VP = "/VP" # dictionary, optional class StreamAttributes: @@ -164,6 +204,20 @@ class TypFitArguments: FIT_R = "/FitR" +class DocumentInformationAttributes: + """TABLE 10.2 Entries in the document information dictionary""" + + TITLE = "/Title" # text string, optional + AUTHOR = "/Author" # text string, optional + SUBJECT = "/Subject" # text string, optional + KEYWORDS = "/Keywords" # text string, optional + CREATOR = "/Creator" # text string, optional + PRODUCER = "/Producer" # text string, optional + CREATION_DATE = "/CreationDate" # date, optional + MOD_DATE = "/ModDate" # date, optional + TRAPPED = "/Trapped" # name, optional + + class PageLayouts: """Page 84, PDF 1.4 reference""" @@ -187,13 +241,40 @@ class CatalogDictionary: """Table 3.25 in the 1.7 reference""" TYPE = "/Type" # name, required; must be /Catalog - # TODO: Many more! + VERSION = "/Version" # name + PAGES = "/Pages" # dictionary, required + PAGE_LABELS = "/PageLabels" # number tree, optional + NAMES = "/Names" # dictionary, optional + DESTS = "/Dests" # dictionary, optional + VIEWER_PREFERENCES = "/ViewerPreferences" # dictionary, optional + PAGE_LAYOUT = "/PageLayout" # name, optional + PAGE_MODE = "/PageMode" # name, optional + OUTLINES = "/Outlines" # dictionary, optional + THREADS = "/Threads" # array, optional + OPEN_ACTION = "/OpenAction" # array or dictionary or name, optional + AA = "/AA" # dictionary, optional + URI = "/URI" # dictionary, optional + ACRO_FORM = "/AcroForm" # dictionary, optional + METADATA = "/Metadata" # stream, optional + STRUCT_TREE_ROOT = "/StructTreeRoot" # dictionary, optional + MARK_INFO = "/MarkInfo" # dictionary, optional + LANG = "/Lang" # text string, optional + SPIDER_INFO = "/SpiderInfo" # dictionary, optional + OUTPUT_INTENTS = "/OutputIntents" # array, optional + PIECE_INFO = "/PieceInfo" # dictionary, optional + OC_PROPERTIES = "/OCProperties" # dictionary, optional + PERMS = "/Perms" # dictionary, optional + LEGAL = "/Legal" # dictionary, optional + REQUIREMENTS = "/Requirements" # array, optional + COLLECTION = "/Collection" # dictionary, optional + NEEDS_RENDERING = "/NeedsRendering" # boolean, optional PDF_KEYS = [ PagesAttributes, PageAttributes, Ressources, + EncryptionDictAttributes, ImageAttributes, StreamAttributes, FilterTypes, diff --git a/PyPDF2/filters.py b/PyPDF2/filters.py index e4de8243d..1d1d37b21 100644 --- a/PyPDF2/filters.py +++ b/PyPDF2/filters.py @@ -226,21 +226,21 @@ def decode(data, decodeParms=None): """ retval = "" hex_pair = "" - x = 0 + index = 0 while True: - if x >= len(data): + if index >= len(data): raise PdfStreamError("Unexpected EOD in ASCIIHexDecode") - c = data[x] - if c == ">": + char = data[index] + if char == ">": break - elif c.isspace(): - x += 1 + elif char.isspace(): + index += 1 continue - hex_pair += c + hex_pair += char if len(hex_pair) == 2: retval += chr(int(hex_pair, base=16)) hex_pair = "" - x += 1 + index += 1 assert hex_pair == "" return retval @@ -353,19 +353,19 @@ def decode(data, decodeParms=None): # remove all whitespace from data data = [y for y in data if y not in " \n\r\t"] while not hit_eod: - c = data[index] - if len(retval) == 0 and c == "<" and data[index + 1] == "~": + char = data[index] + if len(retval) == 0 and char == "<" and data[index + 1] == "~": index += 2 continue # elif c.isspace(): # index += 1 # continue - elif c == "z": + elif char == "z": assert len(group) == 0 retval += "\x00\x00\x00\x00" index += 1 continue - elif c == "~" and data[index + 1] == ">": + elif char == "~" and data[index + 1] == ">": if len(group) != 0: # cannot have a final group of just 1 char assert len(group) > 1 @@ -375,9 +375,9 @@ def decode(data, decodeParms=None): else: break else: - c = ord(c) - 33 - assert c >= 0 and c < 85 - group += [c] + char = ord(char) - 33 + assert char >= 0 and char < 85 + group += [char] if len(group) >= 5: b = ( group[0] * (85**4) @@ -407,17 +407,17 @@ def decode(data, decodeParms=None): data = data.encode("ascii") group_index = b = 0 out = bytearray() - for c in data: - if ord("!") <= c and c <= ord("u"): + for char in data: + if ord("!") <= char and char <= ord("u"): group_index += 1 - b = b * 85 + (c - 33) + b = b * 85 + (char - 33) if group_index == 5: out += struct.pack(b">L", b) group_index = b = 0 - elif c == ord("z"): + elif char == ord("z"): assert group_index == 0 out += b"\0\0\0\0" - elif c == ord("~"): + elif char == ord("~"): if group_index: for _ in range(5 - group_index): b = b * 85 + 84 diff --git a/PyPDF2/merger.py b/PyPDF2/merger.py index b8ecccee8..cf848f817 100644 --- a/PyPDF2/merger.py +++ b/PyPDF2/merger.py @@ -372,21 +372,16 @@ def _trim_outline(self, pdf, outline, pages): return new_outline def _write_dests(self): - dests = self.named_dests - - for v in dests: + for named_dest in self.named_dests: pageno = None - pdf = None - if "/Page" in v: - for i, p in enumerate(self.pages): - if p.id == v["/Page"]: - v[NameObject("/Page")] = p.out_pagedata - pageno = i - pdf = p.src # noqa: F841 + if "/Page" in named_dest: + for pageno, page in enumerate(self.pages): + if page.id == named_dest["/Page"]: + named_dest[NameObject("/Page")] = page.out_pagedata break if pageno is not None: - self.output.addNamedDestinationObject(v) + self.output.addNamedDestinationObject(named_dest) def _write_bookmarks(self, bookmarks=None, parent=None): if bookmarks is None: diff --git a/PyPDF2/xmp.py b/PyPDF2/xmp.py index 29f73a7e8..86ceeb2d3 100644 --- a/PyPDF2/xmp.py +++ b/PyPDF2/xmp.py @@ -101,20 +101,20 @@ def _converter_string(value): @staticmethod def _converter_date(value): - m = iso8601.match(value) - year = int(m.group("year")) - month = int(m.group("month") or "1") - day = int(m.group("day") or "1") - hour = int(m.group("hour") or "0") - minute = int(m.group("minute") or "0") - second = decimal.Decimal(m.group("second") or "0") + matches = iso8601.match(value) + year = int(matches.group("year")) + month = int(matches.group("month") or "1") + day = int(matches.group("day") or "1") + hour = int(matches.group("hour") or "0") + minute = int(matches.group("minute") or "0") + second = decimal.Decimal(matches.group("second") or "0") seconds = second.to_integral(decimal.ROUND_FLOOR) milliseconds = (second - seconds) * 1000000 seconds = int(seconds) milliseconds = int(milliseconds) - tzd = m.group("tzd") or "Z" + tzd = matches.group("tzd") or "Z" dt = datetime.datetime(year, month, day, hour, minute, seconds, milliseconds) if tzd != "Z": tzd_hours, tzd_minutes = [int(x) for x in tzd.split(":")] diff --git a/Tests/test_reader.py b/Tests/test_reader.py index 6b4782680..2175274b1 100644 --- a/Tests/test_reader.py +++ b/Tests/test_reader.py @@ -104,7 +104,7 @@ def test_get_annotations(src): for annot in page[PG.ANNOTS]: subtype = annot.getObject()[IA.SUBTYPE] if subtype == "/Text": - annot.getObject()["/Contents"] + annot.getObject()[PG.CONTENTS] @pytest.mark.parametrize( From 2cfa3a02ed02d92059c813d4f48bc366c60d90b3 Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Sat, 30 Apr 2022 17:57:55 +0200 Subject: [PATCH 4/4] Ignore B007 --- PyPDF2/merger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyPDF2/merger.py b/PyPDF2/merger.py index cf848f817..b3693a6af 100644 --- a/PyPDF2/merger.py +++ b/PyPDF2/merger.py @@ -375,7 +375,7 @@ def _write_dests(self): for named_dest in self.named_dests: pageno = None if "/Page" in named_dest: - for pageno, page in enumerate(self.pages): + for pageno, page in enumerate(self.pages): # noqa: B007 if page.id == named_dest["/Page"]: named_dest[NameObject("/Page")] = page.out_pagedata break