From 94f1d6971b876b4485800a2862cd7eaa4aca674c Mon Sep 17 00:00:00 2001 From: mtd91429 Date: Thu, 14 Jul 2022 13:50:10 -0500 Subject: [PATCH] ENH: Add color and font_format to PdfReader.outlines[i] (#1104) --- PyPDF2/_reader.py | 93 ++++++++++++++++++-------------------------- PyPDF2/generic.py | 9 ++--- tests/test_reader.py | 14 +++++++ 3 files changed, 56 insertions(+), 60 deletions(-) diff --git a/PyPDF2/_reader.py b/PyPDF2/_reader.py index 775847a376..7bff21ae89 100644 --- a/PyPDF2/_reader.py +++ b/PyPDF2/_reader.py @@ -78,6 +78,7 @@ DictionaryObject, EncodedStreamObject, Field, + FloatObject, IndirectObject, NameObject, NullObject, @@ -797,74 +798,56 @@ def _build_destination( title: str, array: List[Union[NumberObject, IndirectObject, NullObject, DictionaryObject]], ) -> Destination: - page, typ = None, None - - if isinstance(array, type(None)): - # handle outlines without destination - page = NullObject() - typ = TextStringObject("/Fit") - return Destination(title, page, typ) - else: - page, typ = array[0:2] - array = array[2:] - try: - return Destination(title, page, typ, *array) # type: ignore - except PdfReadError: - warnings.warn(f"Unknown destination: {title} {array}", PdfReadWarning) - if self.strict: - raise - else: - # create a link to first Page - tmp = self.pages[0].indirect_ref - indirect_ref = NullObject() if tmp is None else tmp - return Destination( - title, indirect_ref, TextStringObject("/Fit") # type: ignore - ) + page, typ = array[0:2] + array = array[2:] + try: + return Destination(title, page, typ, *array) # type: ignore + except PdfReadError: + warnings.warn(f"Unknown destination: {title} {array}", PdfReadWarning) + if self.strict: + raise + else: + # create a link to first Page + tmp = self.pages[0].indirect_ref + indirect_ref = NullObject() if tmp is None else tmp + return Destination( + title, indirect_ref, TextStringObject("/Fit") # type: ignore + ) def _build_outline(self, node: DictionaryObject) -> Optional[Destination]: dest, title, outline = None, None, None - # title required for valid outline - try: - title = node["/Title"] - except KeyError: - raise PdfReadError(f"Invalid Outline Entry, Missing /Title attribute: {node!r}") - - if "/A" in node: + if "/A" in node and "/Title" in node: # Action, section 8.5 (only type GoTo supported) + title = node["/Title"] action = cast(DictionaryObject, node["/A"]) action_type = cast(NameObject, action[GoToActionArguments.S]) if action_type == "/GoTo": dest = action[GoToActionArguments.D] - elif "/Dest" in node: + elif "/Dest" in node and "/Title" in node: # Destination, section 8.2.1 + title = node["/Title"] dest = node["/Dest"] - if isinstance(dest, ArrayObject): - outline = self._build_destination(title, dest) # type: ignore - elif isinstance(dest, str): - # named destination - try: - outline = self._get_named_destinations()[dest] + # if destination found, then create outline + if dest: + if isinstance(dest, ArrayObject): + outline = self._build_destination(title, dest) # type: ignore + elif isinstance(dest, str) and dest in self._namedDests: + outline = self._namedDests[dest] outline[NameObject("/Title")] = title # type: ignore - except KeyError: - # named destination not found in Name Dict - outline = self._build_destination(title, None) - elif isinstance(dest, type(None)): - # outline not required to have destination or GoTo action - # Table 8.4 - outline = self._build_destination(title, dest) # type: ignore - else: - raise PdfReadError(f"Unexpected destination {dest!r}") - - # if color or text format specifications present, add to outline - if "/C" in node: - # Color of outline in (R, G, B) with values ranging 0.0-1.0 - outline[NameObject("/C")] = node["/C"] - if "/F" in node: - # specifies style characteristics bold and/or italic - # 1=italic, 2=bold, 3=both - outline[NameObject("/F")] = node["/F"] + else: + raise PdfReadError(f"Unexpected destination {dest!r}") + + # if outline created, add color and format if present + if outline: + if "/C" in node: + # Color of outline in (R, G, B) with values ranging 0.0-1.0 + outline[NameObject("/C")] = ArrayObject(FloatObject(c) for c in node["/C"]) # type: ignore + if "/F" in node: + # specifies style characteristics bold and/or italic + # 1=italic, 2=bold, 3=both + outline[NameObject("/F")] = node["/F"] return outline diff --git a/PyPDF2/generic.py b/PyPDF2/generic.py index bc9f7e48ad..e8a9d8d466 100644 --- a/PyPDF2/generic.py +++ b/PyPDF2/generic.py @@ -49,7 +49,6 @@ cast, ) from enum import IntFlag - from ._codecs import ( # noqa: rev_encoding _pdfdoc_encoding, _pdfdoc_encoding_rev, @@ -1733,9 +1732,9 @@ def additionalActions(self) -> Optional[DictionaryObject]: # pragma: no cover return self.additional_actions -class OutlineItemFlag(IntFlag): +class OutlineFontFlag(IntFlag): """ - A class used as an enumerable flag for formatting outline font + A class used as an enumerable flag for formatting an outline font """ italic = 1 @@ -1892,10 +1891,10 @@ def bottom(self) -> Optional[FloatObject]: @property def color(self) -> Optional[tuple]: """Read-only property accessing the color in (R, G, B) with values 0.0-1.0""" - return self.get("/C", (0.0, 0.0, 0.0)) + return self.get("/C", [FloatObject(0), FloatObject(0), FloatObject(0)]) @property - def fmt(self) -> Optional[OutlineItemFlag]: + def font_format(self) -> Optional[OutlineFontFlag]: """Read-only property accessing the font type. 1=italic, 2=bold, 3=both""" return self.get("/F", 0) diff --git a/tests/test_reader.py b/tests/test_reader.py index a605fc708a..56bc2a70b0 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -858,3 +858,17 @@ def test_header(src, pdf_header): reader = PdfReader(src) assert reader.pdf_header == pdf_header + + +def test_outline_color(): + url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924546.pdf" + name = "tika-924546.pdf" + reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name))) + assert reader.outlines[0].color == [0, 0, 1] + + +def test_outline_font_format(): + url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924546.pdf" + name = "tika-924546.pdf" + reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name))) + assert reader.outlines[0].font_format == 2