diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index bd70e5708..319a47ccd 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -33,7 +33,9 @@ from ._reader import PdfReader from ._utils import StrByteType, deprecate_with_replacement, str_ from ._writer import PdfWriter +from .constants import GoToActionArguments from .constants import PagesAttributes as PA +from .constants import TypArguments, TypFitArguments from .generic import ( ArrayObject, Bookmark, @@ -48,14 +50,7 @@ TreeObject, ) from .pagerange import PageRange, PageRangeSpec -from .types import ( - BookmarkTypes, - FitType, - LayoutType, - OutlinesType, - PagemodeType, - ZoomArgType, -) +from .types import FitType, LayoutType, OutlinesType, PagemodeType, ZoomArgType ERR_CLOSED_WRITER = "close() was called and thus the writer cannot be used anymore" @@ -153,7 +148,7 @@ def merge( bookmark_typ = Bookmark( TextStringObject(bookmark), NumberObject(self.id_count), - NameObject("/Fit"), + NameObject(TypFitArguments.FIT), ) self.bookmarks += [bookmark_typ, outline] # type: ignore else: @@ -476,65 +471,33 @@ def _write_bookmarks( def _write_bookmark_on_page( self, bookmark: Union[Bookmark, Destination], page: _MergedPage ) -> None: - # b[NameObject('/Page')] = p.out_pagedata - bm_type = cast(BookmarkTypes, bookmark["/Type"]) + bm_type = cast(str, bookmark["/Type"]) args = [NumberObject(page.id), NameObject(bm_type)] - # nothing more to add - # if b['/Type'] == '/Fit' or b['/Type'] == '/FitB' - if bm_type == "/FitH" or bm_type == "/FitBH": - if "/Top" in bookmark and not isinstance(bookmark["/Top"], NullObject): - args.append(FloatObject(bookmark["/Top"])) - else: - args.append(FloatObject(0)) - del bookmark["/Top"] - elif bm_type == "/FitV" or bm_type == "/FitBV": - if "/Left" in bookmark and not isinstance(bookmark["/Left"], NullObject): - args.append(FloatObject(bookmark["/Left"])) - else: - args.append(FloatObject(0)) - del bookmark["/Left"] - elif bm_type == "/XYZ": - if "/Left" in bookmark and not isinstance(bookmark["/Left"], NullObject): - args.append(FloatObject(bookmark["/Left"])) - else: - args.append(FloatObject(0)) - if "/Top" in bookmark and not isinstance(bookmark["/Top"], NullObject): - args.append(FloatObject(bookmark["/Top"])) - else: - args.append(FloatObject(0)) - if "/Zoom" in bookmark and not isinstance(bookmark["/Zoom"], NullObject): - args.append(FloatObject(bookmark["/Zoom"])) + fit2arg_keys: Dict[str, Tuple[str, ...]] = { + TypFitArguments.FIT_H: (TypArguments.TOP,), + TypFitArguments.FIT_BH: (TypArguments.TOP,), + TypFitArguments.FIT_V: (TypArguments.LEFT,), + TypFitArguments.FIT_BV: (TypArguments.LEFT,), + TypFitArguments.XYZ: (TypArguments.LEFT, TypArguments.TOP, "/Zoom"), + TypFitArguments.FIT_R: ( + TypArguments.LEFT, + TypArguments.BOTTOM, + TypArguments.RIGHT, + TypArguments.TOP, + ), + } + for arg_key in fit2arg_keys.get(bm_type, tuple()): + if arg_key in bookmark and not isinstance(bookmark[arg_key], NullObject): + args.append(FloatObject(bookmark[arg_key])) else: args.append(FloatObject(0)) - del bookmark["/Top"], bookmark["/Zoom"], bookmark["/Left"] - elif bm_type == "/FitR": - if "/Left" in bookmark and not isinstance(bookmark["/Left"], NullObject): - args.append(FloatObject(bookmark["/Left"])) - else: - args.append(FloatObject(0)) - if "/Bottom" in bookmark and not isinstance( - bookmark["/Bottom"], NullObject - ): - args.append(FloatObject(bookmark["/Bottom"])) - else: - args.append(FloatObject(0)) - if "/Right" in bookmark and not isinstance(bookmark["/Right"], NullObject): - args.append(FloatObject(bookmark["/Right"])) - else: - args.append(FloatObject(0)) - if "/Top" in bookmark and not isinstance(bookmark["/Top"], NullObject): - args.append(FloatObject(bookmark["/Top"])) - else: - args.append(FloatObject(0)) - del ( - bookmark["/Left"], - bookmark["/Right"], - bookmark["/Bottom"], - bookmark["/Top"], - ) + del bookmark[arg_key] bookmark[NameObject("/A")] = DictionaryObject( - {NameObject("/S"): NameObject("/GoTo"), NameObject("/D"): ArrayObject(args)} + { + NameObject(GoToActionArguments.S): NameObject("/GoTo"), + NameObject(GoToActionArguments.D): ArrayObject(args), + } ) def _associate_dests_to_pages(self, pages: List[_MergedPage]) -> None: @@ -671,7 +634,7 @@ def add_named_destination(self, title: str, pagenum: int) -> None: dest = Destination( TextStringObject(title), NumberObject(pagenum), - NameObject("/FitH"), + NameObject(TypFitArguments.FIT_H), NumberObject(826), ) self.named_dests.append(dest) diff --git a/PyPDF2/_reader.py b/PyPDF2/_reader.py index 72213597a..1f4f68ff7 100644 --- a/PyPDF2/_reader.py +++ b/PyPDF2/_reader.py @@ -60,9 +60,11 @@ skip_over_whitespace, ) from .constants import CatalogAttributes as CA +from .constants import CatalogDictionary from .constants import CatalogDictionary as CD from .constants import Core as CO from .constants import DocumentInformationAttributes as DI +from .constants import FieldDictionaryAttributes, GoToActionArguments from .constants import PageAttributes as PG from .constants import PagesAttributes as PA from .constants import TrailerKeys as TK @@ -473,22 +475,13 @@ def get_fields( default, the mapping name is used for keys. ``None`` if form data could not be located. """ - field_attributes = { - "/FT": "Field Type", - PA.PARENT: "Parent", - "/T": "Field Name", - "/TU": "Alternate Field Name", - "/TM": "Mapping Name", - "/Ff": "Field Flags", - "/V": "Value", - "/DV": "Default Value", - } + field_attributes = FieldDictionaryAttributes.attributes_dict() if retval is None: retval = {} catalog = cast(DictionaryObject, self.trailer[TK.ROOT]) # get the AcroForm tree - if "/AcroForm" in catalog: - tree = cast(Optional[TreeObject], catalog["/AcroForm"]) + if CatalogDictionary.ACRO_FORM in catalog: + tree = cast(Optional[TreeObject], catalog[CatalogDictionary.ACRO_FORM]) else: return None if tree is None: @@ -553,11 +546,15 @@ def _check_kids( self.get_fields(kid.get_object(), retval, fileobj) def _write_field(self, fileobj: Any, field: Any, field_attributes: Any) -> None: - order = ("/TM", "/T", "/FT", PA.PARENT, "/TU", "/Ff", "/V", "/DV") - for attr in order: + for attr in FieldDictionaryAttributes.attributes(): + if attr in ( + FieldDictionaryAttributes.Kids, + FieldDictionaryAttributes.AA, + ): + continue attr_name = field_attributes[attr] try: - if attr == "/FT": + if attr == FieldDictionaryAttributes.FT: # Make the field type value more clear types = { "/Btn": "Button", @@ -567,12 +564,12 @@ def _write_field(self, fileobj: Any, field: Any, field_attributes: Any) -> None: } if field[attr] in types: fileobj.write(attr_name + ": " + types[field[attr]] + "\n") - elif attr == PA.PARENT: + elif attr == FieldDictionaryAttributes.Parent: # Let's just write the name of the parent try: - name = field[PA.PARENT]["/TM"] + name = field[attr][FieldDictionaryAttributes.TM] except KeyError: - name = field[PA.PARENT]["/T"] + name = field[attr][FieldDictionaryAttributes.T] fileobj.write(attr_name + ": " + name + "\n") else: fileobj.write(attr_name + ": " + str(field[attr]) + "\n") @@ -819,9 +816,9 @@ def _build_outline(self, node: DictionaryObject) -> Optional[Destination]: # Action, section 8.5 (only type GoTo supported) title = node["/Title"] action = cast(DictionaryObject, node["/A"]) - action_type = cast(NameObject, action["/S"]) + action_type = cast(NameObject, action[GoToActionArguments.S]) if action_type == "/GoTo": - dest = action["/D"] + dest = action[GoToActionArguments.D] elif "/Dest" in node and "/Title" in node: # Destination, section 8.2.1 title = node["/Title"] diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 17c56dcb6..7529f504e 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -47,13 +47,22 @@ b_, deprecate_with_replacement, ) +from .constants import AnnotationDictionaryAttributes from .constants import CatalogAttributes as CA +from .constants import CatalogDictionary from .constants import Core as CO from .constants import EncryptionDictAttributes as ED +from .constants import ( + FieldDictionaryAttributes, + FileSpecificationDictionaryEntries, + GoToActionArguments, + InteractiveFormDictEntries, +) from .constants import PageAttributes as PG from .constants import PagesAttributes as PA from .constants import StreamAttributes as SA from .constants import TrailerKeys as TK +from .constants import TypFitArguments from .generic import ( ArrayObject, BooleanObject, @@ -187,17 +196,17 @@ def set_need_appearances_writer(self) -> None: try: catalog = self._root_object # get the AcroForm tree - if "/AcroForm" not in catalog: + if CatalogDictionary.ACRO_FORM not in catalog: self._root_object.update( { - NameObject("/AcroForm"): IndirectObject( + NameObject(CatalogDictionary.ACRO_FORM): IndirectObject( len(self._objects), 0, self ) } ) - need_appearances = NameObject("/NeedAppearances") - self._root_object["/AcroForm"][need_appearances] = BooleanObject(True) # type: ignore + need_appearances = NameObject(InteractiveFormDictEntries.NeedAppearances) + self._root_object[CatalogDictionary.ACRO_FORM][need_appearances] = BooleanObject(True) # type: ignore except Exception as exc: logger.error("set_need_appearances_writer() catch : ", repr(exc)) @@ -465,10 +474,10 @@ def add_attachment(self, filename: str, data: Union[str, bytes]) -> None: filespec.update( { NameObject(PA.TYPE): NameObject("/Filespec"), - NameObject("/F"): create_string_object( + NameObject(FileSpecificationDictionaryEntries.F): create_string_object( filename ), # Perhaps also try TextStringObject - NameObject("/EF"): ef_entry, + NameObject(FileSpecificationDictionaryEntries.EF): ef_entry, } ) @@ -579,15 +588,29 @@ def update_page_form_field_values( if PG.PARENT in writer_annot: writer_parent_annot = writer_annot[PG.PARENT] for field in fields: - if writer_annot.get("/T") == field: + if writer_annot.get(FieldDictionaryAttributes.T) == field: writer_annot.update( - {NameObject("/V"): TextStringObject(fields[field])} + { + NameObject(FieldDictionaryAttributes.V): TextStringObject( + fields[field] + ) + } ) if flags: - writer_annot.update({NameObject("/Ff"): NumberObject(flags)}) - elif writer_parent_annot.get("/T") == field: + writer_annot.update( + { + NameObject(FieldDictionaryAttributes.Ff): NumberObject( + flags + ) + } + ) + elif writer_parent_annot.get(FieldDictionaryAttributes.T) == field: writer_parent_annot.update( - {NameObject("/V"): TextStringObject(fields[field])} + { + NameObject(FieldDictionaryAttributes.V): TextStringObject( + fields[field] + ) + } ) def updatePageFormFieldValues( @@ -1109,7 +1132,10 @@ def add_bookmark( ) dest_array = dest.dest_array action.update( - {NameObject("/D"): dest_array, NameObject("/S"): NameObject("/GoTo")} + { + NameObject(GoToActionArguments.D): dest_array, + NameObject(GoToActionArguments.S): NameObject("/GoTo"), + } ) action_ref = self._add_object(action) @@ -1174,10 +1200,10 @@ def add_named_destination(self, title: str, pagenum: int) -> IndirectObject: dest = DictionaryObject() dest.update( { - NameObject("/D"): ArrayObject( - [page_ref, NameObject("/FitH"), NumberObject(826)] + NameObject(GoToActionArguments.D): ArrayObject( + [page_ref, NameObject(TypFitArguments.FIT_H), NumberObject(826)] ), - NameObject("/S"): NameObject("/GoTo"), + NameObject(GoToActionArguments.S): NameObject("/GoTo"), } ) @@ -1407,12 +1433,14 @@ def add_uri( lnk = DictionaryObject() lnk.update( { - NameObject("/Type"): NameObject(PG.ANNOTS), - NameObject("/Subtype"): NameObject("/Link"), - NameObject("/P"): page_link, - NameObject("/Rect"): rect, + NameObject(AnnotationDictionaryAttributes.Type): NameObject(PG.ANNOTS), + NameObject(AnnotationDictionaryAttributes.Subtype): NameObject("/Link"), + NameObject(AnnotationDictionaryAttributes.P): page_link, + NameObject(AnnotationDictionaryAttributes.Rect): rect, NameObject("/H"): NameObject("/I"), - NameObject("/Border"): ArrayObject(border_arr), + NameObject(AnnotationDictionaryAttributes.Border): ArrayObject( + border_arr + ), NameObject("/A"): lnk2, } ) diff --git a/PyPDF2/constants.py b/PyPDF2/constants.py index 3f1c50f6f..ab5b55e55 100644 --- a/PyPDF2/constants.py +++ b/PyPDF2/constants.py @@ -8,6 +8,8 @@ PDF Reference, sixth edition, Version 1.7, 2006. """ +from typing import Dict, Tuple + class Core: """Keywords that don't quite belong anywhere else.""" @@ -103,6 +105,15 @@ class PageAttributes: VP = "/VP" # dictionary, optional +class FileSpecificationDictionaryEntries: + """TABLE 3.41 Entries in a file specification dictionary""" + + Type = "/Type" + FS = "/FS" # The name of the file system to be used to interpret this file specification + F = "/F" # A file specification string of the form described in Section 3.10.1 + EF = "/EF" # dictionary, containing a subset of the keys F , UF , DOS , Mac , and Unix + + class StreamAttributes: """Table 4.2.""" @@ -202,9 +213,45 @@ class TypFitArguments: FIT_H = "/FitH" FIT_BH = "/FitBH" FIT_R = "/FitR" + XYZ = "/XYZ" + + +class GoToActionArguments: + S = "/S" # name, required: type of action + D = "/D" # name / byte string /array, required: Destination to jump to + + +class AnnotationDictionaryAttributes: + """TABLE 8.15 Entries common to all annotation dictionaries""" + + Type = "/Type" + Subtype = "/Subtype" + Rect = "/Rect" + Contents = "/Contents" + P = "/P" + NM = "/NM" + M = "/M" + F = "/F" + AP = "/AP" + AS = "/AS" + Border = "/Border" + C = "/C" + StructParent = "/StructParent" + OC = "/OC" -class FieldDistionaryAttributes: +class InteractiveFormDictEntries: + Fields = "/Fields" + NeedAppearances = "/NeedAppearances" + SigFlags = "/SigFlags" + CO = "/CO" + DR = "/DR" + DA = "/DA" + Q = "/Q" + XFA = "/XFA" + + +class FieldDictionaryAttributes: """TABLE 8.69 Entries common to all field dictionaries (PDF 1.7 reference).""" FT = "/FT" # name, required for terminal fields @@ -218,6 +265,34 @@ class FieldDistionaryAttributes: DV = "/DV" # text string, optional AA = "/AA" # dictionary, optional + @classmethod + def attributes(cls) -> Tuple[str, ...]: + return ( + cls.TM, + cls.T, + cls.FT, + cls.Parent, + cls.TU, + cls.Ff, + cls.V, + cls.DV, + cls.Kids, + cls.AA, + ) + + @classmethod + def attributes_dict(cls) -> Dict[str, str]: + return { + cls.FT: "Field Type", + cls.Parent: "Parent", + cls.T: "Field Name", + cls.TU: "Alternate Field Name", + cls.TM: "Mapping Name", + cls.Ff: "Field Flags", + cls.V: "Value", + cls.DV: "Default Value", + } + class DocumentInformationAttributes: """TABLE 10.2 Entries in the document information dictionary.""" @@ -286,20 +361,28 @@ class CatalogDictionary: PDF_KEYS = ( - PagesAttributes, - PageAttributes, - Ressources, + AnnotationDictionaryAttributes, + CatalogAttributes, + CatalogDictionary, + CcittFaxDecodeParameters, + ColorSpaces, + Core, + DocumentInformationAttributes, EncryptionDictAttributes, - ImageAttributes, - StreamAttributes, + FieldDictionaryAttributes, + FilterTypeAbbreviations, FilterTypes, + GoToActionArguments, + GraphicsStateParameters, + ImageAttributes, + FileSpecificationDictionaryEntries, LzwFilterParameters, - TypArguments, - TypFitArguments, + PageAttributes, PageLayouts, - GraphicsStateParameters, - CatalogDictionary, - Core, + PagesAttributes, + Ressources, + StreamAttributes, TrailerKeys, - CatalogAttributes, + TypArguments, + TypFitArguments, ) diff --git a/PyPDF2/generic.py b/PyPDF2/generic.py index 22e54c798..a7135fd12 100644 --- a/PyPDF2/generic.py +++ b/PyPDF2/generic.py @@ -58,6 +58,7 @@ skip_over_comment, str_, ) +from .constants import FieldDictionaryAttributes from .constants import FilterTypes as FT from .constants import StreamAttributes as SA from .constants import TypArguments as TA @@ -1583,19 +1584,8 @@ class Field(TreeObject): def __init__(self, data: Dict[str, Any]) -> None: DictionaryObject.__init__(self) - attributes = ( - "/FT", - "/Parent", - "/Kids", - "/T", - "/TU", - "/TM", - "/Ff", - "/V", - "/DV", - "/AA", - ) - for attr in attributes: + + for attr in FieldDictionaryAttributes.attributes(): try: self[NameObject(attr)] = data[attr] except KeyError: @@ -1605,7 +1595,7 @@ def __init__(self, data: Dict[str, Any]) -> None: @property def field_type(self) -> Optional[NameObject]: """Read-only property accessing the type of this field.""" - return self.get("/FT") + return self.get(FieldDictionaryAttributes.FT) @property def fieldType(self) -> Optional[NameObject]: # pragma: no cover @@ -1620,22 +1610,22 @@ def fieldType(self) -> Optional[NameObject]: # pragma: no cover @property def parent(self) -> Optional[DictionaryObject]: """Read-only property accessing the parent of this field.""" - return self.get("/Parent") + return self.get(FieldDictionaryAttributes.Parent) @property def kids(self) -> Optional[ArrayObject]: """Read-only property accessing the kids of this field.""" - return self.get("/Kids") + return self.get(FieldDictionaryAttributes.Kids) @property def name(self) -> Optional[str]: """Read-only property accessing the name of this field.""" - return self.get("/T") + return self.get(FieldDictionaryAttributes.T) @property def alternate_name(self) -> Optional[str]: """Read-only property accessing the alternate name of this field.""" - return self.get("/TU") + return self.get(FieldDictionaryAttributes.TU) @property def altName(self) -> Optional[str]: # pragma: no cover @@ -1654,7 +1644,7 @@ def mapping_name(self) -> Optional[str]: name is used by PyPDF2 as a key in the dictionary returned by :meth:`get_fields()` """ - return self.get("/TM") + return self.get(FieldDictionaryAttributes.TM) @property def mappingName(self) -> Optional[str]: # pragma: no cover @@ -1672,7 +1662,7 @@ def flags(self) -> Optional[int]: Read-only property accessing the field flags, specifying various characteristics of the field (see Table 8.70 of the PDF 1.7 reference). """ - return self.get("/Ff") + return self.get(FieldDictionaryAttributes.Ff) @property def value(self) -> Optional[Any]: @@ -1680,12 +1670,12 @@ def value(self) -> Optional[Any]: Read-only property accessing the value of this field. Format varies based on field type. """ - return self.get("/V") + return self.get(FieldDictionaryAttributes.V) @property def default_value(self) -> Optional[Any]: """Read-only property accessing the default value of this field.""" - return self.get("/DV") + return self.get(FieldDictionaryAttributes.DV) @property def defaultValue(self) -> Optional[Any]: # pragma: no cover @@ -1704,7 +1694,7 @@ def additional_actions(self) -> Optional[DictionaryObject]: This dictionary defines the field's behavior in response to trigger events. See Section 8.5.2 of the PDF 1.7 reference. """ - return self.get("/AA") + return self.get(FieldDictionaryAttributes.AA) @property def additionalActions(self) -> Optional[DictionaryObject]: # pragma: no cover diff --git a/mutmut_config.py b/mutmut_config.py new file mode 100644 index 000000000..7f7c612da --- /dev/null +++ b/mutmut_config.py @@ -0,0 +1,11 @@ +def pre_mutation(context): + if "_codecs" in context.filename: + context.skip = True + + line = context.current_source_line.strip() + if "pragma: no cover" in line: + context.skip = True + if "deprecate" in line: + context.skip = True + if line.strip().startswith("logger"): + context.skip = True diff --git a/tests/test_constants.py b/tests/test_constants.py index fa34357c5..62775b25f 100644 --- a/tests/test_constants.py +++ b/tests/test_constants.py @@ -1,4 +1,5 @@ import re +from typing import Callable from PyPDF2.constants import PDF_KEYS @@ -10,6 +11,8 @@ def test_slash_prefix(): if attr.startswith("__") and attr.endswith("__"): continue constant_value = getattr(cls, attr) + if isinstance(constant_value, Callable): + continue assert constant_value.startswith("/") assert pattern.match(constant_value) assert attr.replace("_", "").lower() == constant_value[1:].lower() diff --git a/tests/test_encryption.py b/tests/test_encryption.py index 711cd6e5a..c357fc237 100644 --- a/tests/test_encryption.py +++ b/tests/test_encryption.py @@ -71,6 +71,7 @@ def test_encryption(name): ) def test_both_password(name, user_passwd, owner_passwd): from PyPDF2 import PasswordType + inputfile = os.path.join(RESOURCE_ROOT, "encryption", name) ipdf = PyPDF2.PdfReader(inputfile) assert ipdf.is_encrypted @@ -82,7 +83,14 @@ def test_both_password(name, user_passwd, owner_passwd): @pytest.mark.parametrize( "names", [ - (["unencrypted.pdf", "r3-user-password.pdf", "r4-aes-user-password.pdf", "r5-user-password.pdf"]), + ( + [ + "unencrypted.pdf", + "r3-user-password.pdf", + "r4-aes-user-password.pdf", + "r5-user-password.pdf", + ] + ), ], ) def test_encryption_merge(names):