Skip to content

Commit

Permalink
MAINT: Use grouped constants instead of literals
Browse files Browse the repository at this point in the history
This allows us to document what they are good for and to distinguish
literals of the same name but of different contexts,
e.g. /Type in Pages or /Type in Page
  • Loading branch information
MartinThoma committed Apr 14, 2022
1 parent e45e66b commit c1107cf
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 36 deletions.
7 changes: 4 additions & 3 deletions PyPDF2/merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from .utils import isString, str_
from .pdf import PdfFileReader, PdfFileWriter
from .pagerange import PageRange
from PyPDF2.pdf_attributes import PagesAttributes as PA
from sys import version_info
if version_info < ( 3, 0 ):
from cStringIO import StringIO
Expand Down Expand Up @@ -218,8 +219,8 @@ def write(self, fileobj):
# The commented out line below was replaced with the two lines below it to allow PdfFileMerger to work with PyPdf 1.13
for page in self.pages:
self.output.addPage(page.pagedata)
page.out_pagedata = self.output.getReference(self.output._pages.getObject()["/Kids"][-1].getObject())
# idnum = self.output._objects.index(self.output._pages.getObject()["/Kids"][-1].getObject()) + 1
page.out_pagedata = self.output.getReference(self.output._pages.getObject()[PA.KIDS][-1].getObject())
# idnum = self.output._objects.index(self.output._pages.getObject()[PA.KIDS][-1].getObject()) + 1
# page.out_pagedata = IndirectObject(idnum, 0, self.output)

# Once all pages are added, create bookmarks to point at those pages
Expand Down Expand Up @@ -546,7 +547,7 @@ def remove(self, index):
self.tree.removeChild(obj)

def add(self, title, pagenum):
pageRef = self.pdf.getObject(self.pdf._pages)['/Kids'][pagenum]
pageRef = self.pdf.getObject(self.pdf._pages)[PA.KIDS][pagenum]
action = DictionaryObject()
action.update({
NameObject('/D') : ArrayObject([pageRef, NameObject('/FitH'), NumberObject(826)]),
Expand Down
67 changes: 34 additions & 33 deletions PyPDF2/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
from .generic import *
from .utils import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirtualList
from .utils import isString, b_, u_, ord_, str_, formatWarning
from PyPDF2.pdf_attributes import PagesAttributes as PA, PageAttributes as PG

if version_info < ( 2, 4 ):
from sets import ImmutableSet as frozenset
Expand All @@ -86,9 +87,9 @@ def __init__(self):
# The root of our page tree node.
pages = DictionaryObject()
pages.update({
NameObject("/Type"): NameObject("/Pages"),
NameObject("/Count"): NumberObject(0),
NameObject("/Kids"): ArrayObject(),
NameObject(PA.TYPE): NameObject("/Pages"),
NameObject(PA.COUNT): NumberObject(0),
NameObject(PA.KIDS): ArrayObject(),
})
self._pages = self._addObject(pages)

Expand All @@ -102,7 +103,7 @@ def __init__(self):
# root object
root = DictionaryObject()
root.update({
NameObject("/Type"): NameObject("/Catalog"),
NameObject(PA.TYPE): NameObject("/Catalog"),
NameObject("/Pages"): self._pages,
})
self._root = None
Expand All @@ -118,12 +119,12 @@ def getObject(self, ido):
return self._objects[ido.idnum - 1]

def _addPage(self, page, action):
assert page["/Type"] == "/Page"
assert page[PA.TYPE] == "/Page"
page[NameObject("/Parent")] = self._pages
page = self._addObject(page)
pages = self.getObject(self._pages)
action(pages["/Kids"], page)
pages[NameObject("/Count")] = NumberObject(pages["/Count"] + 1)
action(pages[PA.KIDS], page)
pages[NameObject(PA.COUNT)] = NumberObject(pages[PA.COUNT] + 1)

def addPage(self, page):
"""
Expand Down Expand Up @@ -157,7 +158,7 @@ def getPage(self, pageNumber):
"""
pages = self.getObject(self._pages)
# XXX: crude hack
return pages["/Kids"][pageNumber].getObject()
return pages[PA.KIDS][pageNumber].getObject()

def getNumPages(self):
"""
Expand Down Expand Up @@ -220,7 +221,7 @@ def addJS(self, javascript):
"""
js = DictionaryObject()
js.update({
NameObject("/Type"): NameObject("/Action"),
NameObject(PA.TYPE): NameObject("/Action"),
NameObject("/S"): NameObject("/JavaScript"),
NameObject("/JS"): NameObject("(%s)" % javascript)
})
Expand Down Expand Up @@ -275,7 +276,7 @@ def addAttachment(self, fname, fdata):
file_entry = DecodedStreamObject()
file_entry.setData(fdata)
file_entry.update({
NameObject("/Type"): NameObject("/EmbeddedFile")
NameObject(PA.TYPE): NameObject("/EmbeddedFile")
})

# The Filespec entry
Expand All @@ -292,7 +293,7 @@ def addAttachment(self, fname, fdata):

filespec = DictionaryObject()
filespec.update({
NameObject("/Type"): NameObject("/Filespec"),
NameObject(PA.TYPE): NameObject("/Filespec"),
NameObject("/F"): createStringObject(fname), # Perhaps also try TextStringObject
NameObject("/EF"): efEntry
})
Expand Down Expand Up @@ -707,7 +708,7 @@ def addBookmark(self, title, pagenum, parent=None, color=None, bold=False, itali
:param str fit: The fit of the destination page. See
:meth:`addLink()<addLink>` for details.
"""
pageRef = self.getObject(self._pages)['/Kids'][pagenum]
pageRef = self.getObject(self._pages)[PA.KIDS][pagenum]
action = DictionaryObject()
zoomArgs = []
for a in args:
Expand Down Expand Up @@ -762,7 +763,7 @@ def addNamedDestinationObject(self, dest):
return destRef

def addNamedDestination(self, title, pagenum):
pageRef = self.getObject(self._pages)['/Kids'][pagenum]
pageRef = self.getObject(self._pages)[PA.KIDS][pagenum]
dest = DictionaryObject()
dest.update({
NameObject('/D') : ArrayObject([pageRef, NameObject('/FitH'), NumberObject(826)]),
Expand All @@ -780,7 +781,7 @@ def removeLinks(self):
"""
Removes links and annotations from this output.
"""
pages = self.getObject(self._pages)['/Kids']
pages = self.getObject(self._pages)[PA.KIDS]
for page in pages:
pageRef = self.getObject(page)
if "/Annots" in pageRef:
Expand All @@ -793,7 +794,7 @@ def removeImages(self, ignoreByteStringObject=False):
:param bool ignoreByteStringObject: optional parameter
to ignore ByteString Objects.
"""
pages = self.getObject(self._pages)['/Kids']
pages = self.getObject(self._pages)[PA.KIDS]
for j in range(len(pages)):
page = pages[j]
pageRef = self.getObject(page)
Expand Down Expand Up @@ -848,7 +849,7 @@ def removeText(self, ignoreByteStringObject=False):
:param bool ignoreByteStringObject: optional parameter
to ignore ByteString Objects.
"""
pages = self.getObject(self._pages)['/Kids']
pages = self.getObject(self._pages)[PA.KIDS]
for j in range(len(pages)):
page = pages[j]
pageRef = self.getObject(page)
Expand Down Expand Up @@ -913,7 +914,7 @@ def addURI(self, pagenum, uri, rect, border=None):
-John Mulligan
"""

pageLink = self.getObject(self._pages)['/Kids'][pagenum]
pageLink = self.getObject(self._pages)[PA.KIDS][pagenum]
pageRef = self.getObject(pageLink)

if border is not None:
Expand Down Expand Up @@ -989,8 +990,8 @@ def addLink(self, pagenum, pagedest, rect, border=None, fit='/Fit', *args):
- [left]
"""

pageLink = self.getObject(self._pages)['/Kids'][pagenum]
pageDest = self.getObject(self._pages)['/Kids'][pagedest] # TODO: switch for external link
pageLink = self.getObject(self._pages)[PA.KIDS][pagenum]
pageDest = self.getObject(self._pages)[PA.KIDS][pagedest] # TODO: switch for external link
pageRef = self.getObject(pageLink)

if border is not None:
Expand Down Expand Up @@ -1338,9 +1339,9 @@ def _buildField(self, field, retval, fileobj, fieldAttributes):
retval[key] = Field(field)

def _checkKids(self, tree, retval, fileobj):
if "/Kids" in tree:
if PA.KIDS in tree:
# recurse down the tree
for kid in tree["/Kids"]:
for kid in tree[PA.KIDS]:
self.getFields(kid.getObject(), retval, fileobj)

def _writeField(self, fileobj, field, fieldAttributes):
Expand Down Expand Up @@ -1400,9 +1401,9 @@ def getNamedDestinations(self, tree=None, retval=None):
if tree is None:
return retval

if "/Kids" in tree:
if PA.KIDS in tree:
# recurse down the tree
for kid in tree["/Kids"]:
for kid in tree[PA.KIDS]:
self.getNamedDestinations(kid.getObject(), retval)

if "/Names" in tree:
Expand Down Expand Up @@ -1589,7 +1590,7 @@ def getPageMode(self):

def _flatten(self, pages=None, inherit=None, indirectRef=None):
inheritablePageAttributes = (
NameObject("/Resources"), NameObject("/MediaBox"),
NameObject("/Resources"), NameObject(PG.MEDIABOX),
NameObject("/CropBox"), NameObject("/Rotate")
)
if inherit is None:
Expand All @@ -1600,14 +1601,14 @@ def _flatten(self, pages=None, inherit=None, indirectRef=None):
pages = catalog["/Pages"].getObject()

t = "/Pages"
if "/Type" in pages:
t = pages["/Type"]
if PA.TYPE in pages:
t = pages[PA.TYPE]

if t == "/Pages":
for attr in inheritablePageAttributes:
if attr in pages:
inherit[attr] = pages[attr]
for page in pages["/Kids"]:
for page in pages[PA.KIDS]:
addt = {}
if isinstance(page, IndirectObject):
addt["indirectRef"] = page
Expand Down Expand Up @@ -2237,7 +2238,7 @@ def createBlankPage(pdf=None, width=None, height=None):
height = lastpage.mediaBox.getHeight()
else:
raise utils.PageSizeNotDefinedError()
page.__setitem__(NameObject('/MediaBox'),
page.__setitem__(NameObject(PG.MEDIABOX),
RectangleObject([0, 0, width, height]))

return page
Expand Down Expand Up @@ -2736,14 +2737,14 @@ def extractText(self, Tj_sep="", TJ_sep=" "):
text += "\n"
return text

mediaBox = createRectangleAccessor("/MediaBox", ())
mediaBox = createRectangleAccessor(PG.MEDIABOX, ())
"""
A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units,
defining the boundaries of the physical medium on which the page is
intended to be displayed or printed.
"""

cropBox = createRectangleAccessor("/CropBox", ("/MediaBox",))
cropBox = createRectangleAccessor("/CropBox", (PG.MEDIABOX,))
"""
A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units,
defining the visible region of default user space. When the page is
Expand All @@ -2752,20 +2753,20 @@ def extractText(self, Tj_sep="", TJ_sep=" "):
implementation-defined manner. Default value: same as :attr:`mediaBox<mediaBox>`.
"""

bleedBox = createRectangleAccessor("/BleedBox", ("/CropBox", "/MediaBox"))
bleedBox = createRectangleAccessor("/BleedBox", ("/CropBox", PG.MEDIABOX))
"""
A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units,
defining the region to which the contents of the page should be clipped
when output in a production enviroment.
"""

trimBox = createRectangleAccessor("/TrimBox", ("/CropBox", "/MediaBox"))
trimBox = createRectangleAccessor("/TrimBox", ("/CropBox", PG.MEDIABOX))
"""
A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units,
defining the intended dimensions of the finished page after trimming.
"""

artBox = createRectangleAccessor("/ArtBox", ("/CropBox", "/MediaBox"))
artBox = createRectangleAccessor("/ArtBox", ("/CropBox", PG.MEDIABOX))
"""
A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units,
defining the extent of the page's meaningful content as intended by the
Expand Down
18 changes: 18 additions & 0 deletions PyPDF2/pdf_attributes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"""
See Portable Document Format Reference Manual, 1993. ISBN 0-201-62628-4.
See https://ia802202.us.archive.org/8/items/pdfy-0vt8s-egqFwDl7L2/PDF%20Reference%201.0.pdf
"""

class PagesAttributes:
"""Page Attributes, Table 6.2, Page 52"""
TYPE = "/Type" # name, required; must be /Pages
KIDS = "/Kids" # array, required; List of indirect references
COUNT = "/Count" # integer, required; the number of all nodes und this node
PARENT = "/Parent" # dictionary, required; indirect reference to pages object

class PageAttributes:
"""Page attributes, Table 6.3, Page 53"""
TYPE = "/Type" # name, required; must be /Page
MEDIABOX = "/MediaBox" # array, required; rectangle specifying page size
...

0 comments on commit c1107cf

Please sign in to comment.