Merge branch 'main' into main

py-pdf · Jul 13, 2022 · 5715653 · 5715653
2 parents 5959cf7 + af5a0c3
commit 5715653
Show file tree

Hide file tree

Showing 14 changed files with 527 additions and 173 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,45 @@
 # CHANGELOG
 
+## Version 2.5.0, 2022-07-10
+
+### New Features (ENH)
+-  Add support for indexed color spaces / BitsPerComponent for decoding PNGs (#1067)
+-  Add PageObject._get_fonts (#1083)
+
+### Performance Improvements (PI)
+-  Use iterative DFS in PdfWriter._sweep_indirect_references (#1072)
+
+### Bug Fixes (BUG)
+-  Let Page.scale also scale the crop-/trim-/bleed-/artbox (#1066)
+-  Column default for CCITTFaxDecode (#1079)
+
+### Robustness (ROB)
+-  Guard against None-value in _get_outlines (#1060)
+
+### Documentation (DOC)
+-  Stamps and watermarks (#1082)
+-  OCR vs PDF text extraction (#1081)
+-  Python Version support
+-  Formatting of CHANGELOG
+
+### Developer Experience (DEV)
+-  Cache downloaded files (#1070)
+-  Speed-up for CI (#1069)
+
+### Maintenance (MAINT)
+-  Set page.rotate(angle: int) (#1092)
+-  Issue #416 was fixed by #1015 (#1078)
+
+### Testing (TST)
+-  Image extraction (#1080)
+-  Image extraction (#1077)
+
+### Code Style (STY)
+-  Apply black
+-  Typo in Changelog
+
+Full Changelog: https://github.com/py-pdf/PyPDF2/compare/2.4.2...2.5.0
+
 ## Version 2.4.2, 2022-07-05
 
 ### New Features (ENH)

diff --git a/PyPDF2/_page.py b/PyPDF2/_page.py
@@ -39,6 +39,7 @@
     Iterator,
     List,
     Optional,
+    Set,
     Tuple,
     Union,
     cast,
@@ -297,7 +298,7 @@ def createBlankPage(
         deprecate_with_replacement("createBlankPage", "create_blank_page")
         return PageObject.create_blank_page(pdf, width, height)
 
-    def rotate(self, angle: float) -> "PageObject":
+    def rotate(self, angle: int) -> "PageObject":
         """
         Rotate a page clockwise by increments of 90 degrees.
 
@@ -313,11 +314,11 @@ def rotate(self, angle: float) -> "PageObject":
         self[NameObject(PG.ROTATE)] = NumberObject(current_angle + angle)
         return self
 
-    def rotate_clockwise(self, angle: float) -> "PageObject":  # pragma: no cover
+    def rotate_clockwise(self, angle: int) -> "PageObject":  # pragma: no cover
         deprecate_with_replacement("rotate_clockwise", "rotate")
         return self.rotate(angle)
 
-    def rotateClockwise(self, angle: float) -> "PageObject":  # pragma: no cover
+    def rotateClockwise(self, angle: int) -> "PageObject":  # pragma: no cover
         """
         .. deprecated:: 1.28.0
 
@@ -326,7 +327,7 @@ def rotateClockwise(self, angle: float) -> "PageObject":  # pragma: no cover
         deprecate_with_replacement("rotateClockwise", "rotate")
         return self.rotate(angle)
 
-    def rotateCounterClockwise(self, angle: float) -> "PageObject":  # pragma: no cover
+    def rotateCounterClockwise(self, angle: int) -> "PageObject":  # pragma: no cover
         """
         .. deprecated:: 1.28.0
 
@@ -1447,6 +1448,18 @@ def extractText(
         deprecate_with_replacement("extractText", "extract_text")
         return self.extract_text(Tj_sep=Tj_sep, TJ_sep=TJ_sep)
 
+    def _get_fonts(self) -> Tuple[Set[str], Set[str]]:
+        """
+        Get the names of embedded fonts and unembedded fonts.
+
+        :return: (Set of embedded fonts, set of unembedded fonts)
+        """
+        obj = self.get_object()
+        assert isinstance(obj, DictionaryObject)
+        fonts, embedded = _get_fonts_walk(cast(DictionaryObject, obj["/Resources"]))
+        unembedded = fonts - embedded
+        return embedded, unembedded
+
     mediabox = _create_rectangle_accessor(PG.MEDIABOX, ())
     """
     A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units,
@@ -1595,3 +1608,35 @@ def __getitem__(self, index: int) -> PageObject:
     def __iter__(self) -> Iterator[PageObject]:
         for i in range(len(self)):
             yield self[i]
+
+
+def _get_fonts_walk(
+    obj: DictionaryObject,
+    fnt: Optional[Set[str]] = None,
+    emb: Optional[Set[str]] = None,
+) -> Tuple[Set[str], Set[str]]:
+    """
+    If there is a key called 'BaseFont', that is a font that is used in the document.
+    If there is a key called 'FontName' and another key in the same dictionary object
+    that is called 'FontFilex' (where x is null, 2, or 3), then that fontname is
+    embedded.
+
+    We create and add to two sets, fnt = fonts used and emb = fonts embedded.
+    """
+    if fnt is None:
+        fnt = set()
+    if emb is None:
+        emb = set()
+    if not hasattr(obj, "keys"):
+        return set(), set()
+    fontkeys = ("/FontFile", "/FontFile2", "/FontFile3")
+    if "/BaseFont" in obj:
+        fnt.add(cast(str, obj["/BaseFont"]))
+    if "/FontName" in obj:
+        if [x for x in fontkeys if x in obj]:  # test to see if there is FontFile
+            emb.add(cast(str, obj["/FontName"]))
+
+    for key in obj.keys():
+        _get_fonts_walk(cast(DictionaryObject, obj[key]), fnt, emb)
+
+    return fnt, emb  # return the sets for each page
diff --git a/PyPDF2/_version.py b/PyPDF2/_version.py
@@ -1 +1 @@
-__version__ = "2.4.2"
+__version__ = "2.5.0"