Skip to content

Commit

Permalink
ENH: Add color and font_format to PdfReader.outlines[i] (py-pdf#1104)
Browse files Browse the repository at this point in the history
  • Loading branch information
mtd91429 committed Jul 15, 2022
1 parent 3a8a78f commit 94f1d69
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 60 deletions.
93 changes: 38 additions & 55 deletions PyPDF2/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
DictionaryObject,
EncodedStreamObject,
Field,
FloatObject,
IndirectObject,
NameObject,
NullObject,
Expand Down Expand Up @@ -797,74 +798,56 @@ def _build_destination(
title: str,
array: List[Union[NumberObject, IndirectObject, NullObject, DictionaryObject]],
) -> Destination:
page, typ = None, None

if isinstance(array, type(None)):
# handle outlines without destination
page = NullObject()
typ = TextStringObject("/Fit")
return Destination(title, page, typ)
else:
page, typ = array[0:2]
array = array[2:]
try:
return Destination(title, page, typ, *array) # type: ignore
except PdfReadError:
warnings.warn(f"Unknown destination: {title} {array}", PdfReadWarning)
if self.strict:
raise
else:
# create a link to first Page
tmp = self.pages[0].indirect_ref
indirect_ref = NullObject() if tmp is None else tmp
return Destination(
title, indirect_ref, TextStringObject("/Fit") # type: ignore
)
page, typ = array[0:2]
array = array[2:]
try:
return Destination(title, page, typ, *array) # type: ignore
except PdfReadError:
warnings.warn(f"Unknown destination: {title} {array}", PdfReadWarning)
if self.strict:
raise
else:
# create a link to first Page
tmp = self.pages[0].indirect_ref
indirect_ref = NullObject() if tmp is None else tmp
return Destination(
title, indirect_ref, TextStringObject("/Fit") # type: ignore
)

def _build_outline(self, node: DictionaryObject) -> Optional[Destination]:
dest, title, outline = None, None, None

# title required for valid outline
try:
title = node["/Title"]
except KeyError:
raise PdfReadError(f"Invalid Outline Entry, Missing /Title attribute: {node!r}")

if "/A" in node:
if "/A" in node and "/Title" in node:
# Action, section 8.5 (only type GoTo supported)
title = node["/Title"]
action = cast(DictionaryObject, node["/A"])
action_type = cast(NameObject, action[GoToActionArguments.S])
if action_type == "/GoTo":
dest = action[GoToActionArguments.D]
elif "/Dest" in node:
elif "/Dest" in node and "/Title" in node:
# Destination, section 8.2.1
title = node["/Title"]
dest = node["/Dest"]

if isinstance(dest, ArrayObject):
outline = self._build_destination(title, dest) # type: ignore
elif isinstance(dest, str):
# named destination
try:
outline = self._get_named_destinations()[dest]
# if destination found, then create outline
if dest:
if isinstance(dest, ArrayObject):
outline = self._build_destination(title, dest) # type: ignore
elif isinstance(dest, str) and dest in self._namedDests:
outline = self._namedDests[dest]
outline[NameObject("/Title")] = title # type: ignore
except KeyError:
# named destination not found in Name Dict
outline = self._build_destination(title, None)
elif isinstance(dest, type(None)):
# outline not required to have destination or GoTo action
# Table 8.4
outline = self._build_destination(title, dest) # type: ignore
else:
raise PdfReadError(f"Unexpected destination {dest!r}")

# if color or text format specifications present, add to outline
if "/C" in node:
# Color of outline in (R, G, B) with values ranging 0.0-1.0
outline[NameObject("/C")] = node["/C"]
if "/F" in node:
# specifies style characteristics bold and/or italic
# 1=italic, 2=bold, 3=both
outline[NameObject("/F")] = node["/F"]
else:
raise PdfReadError(f"Unexpected destination {dest!r}")

# if outline created, add color and format if present
if outline:
if "/C" in node:
# Color of outline in (R, G, B) with values ranging 0.0-1.0
outline[NameObject("/C")] = ArrayObject(FloatObject(c) for c in node["/C"]) # type: ignore
if "/F" in node:
# specifies style characteristics bold and/or italic
# 1=italic, 2=bold, 3=both
outline[NameObject("/F")] = node["/F"]

return outline

Expand Down
9 changes: 4 additions & 5 deletions PyPDF2/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@
cast,
)
from enum import IntFlag

from ._codecs import ( # noqa: rev_encoding
_pdfdoc_encoding,
_pdfdoc_encoding_rev,
Expand Down Expand Up @@ -1733,9 +1732,9 @@ def additionalActions(self) -> Optional[DictionaryObject]: # pragma: no cover
return self.additional_actions


class OutlineItemFlag(IntFlag):
class OutlineFontFlag(IntFlag):
"""
A class used as an enumerable flag for formatting outline font
A class used as an enumerable flag for formatting an outline font
"""

italic = 1
Expand Down Expand Up @@ -1892,10 +1891,10 @@ def bottom(self) -> Optional[FloatObject]:
@property
def color(self) -> Optional[tuple]:
"""Read-only property accessing the color in (R, G, B) with values 0.0-1.0"""
return self.get("/C", (0.0, 0.0, 0.0))
return self.get("/C", [FloatObject(0), FloatObject(0), FloatObject(0)])

@property
def fmt(self) -> Optional[OutlineItemFlag]:
def font_format(self) -> Optional[OutlineFontFlag]:
"""Read-only property accessing the font type. 1=italic, 2=bold, 3=both"""
return self.get("/F", 0)

Expand Down
14 changes: 14 additions & 0 deletions tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -858,3 +858,17 @@ def test_header(src, pdf_header):
reader = PdfReader(src)

assert reader.pdf_header == pdf_header


def test_outline_color():
url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924546.pdf"
name = "tika-924546.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
assert reader.outlines[0].color == [0, 0, 1]


def test_outline_font_format():
url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924546.pdf"
name = "tika-924546.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
assert reader.outlines[0].font_format == 2

0 comments on commit 94f1d69

Please sign in to comment.