Skip to content

Commit

Permalink
Merge branch 'main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinThoma authored Jul 13, 2022
2 parents 5959cf7 + af5a0c3 commit 5715653
Show file tree
Hide file tree
Showing 14 changed files with 527 additions and 173 deletions.
40 changes: 40 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,45 @@
# CHANGELOG

## Version 2.5.0, 2022-07-10

### New Features (ENH)
- Add support for indexed color spaces / BitsPerComponent for decoding PNGs (#1067)
- Add PageObject._get_fonts (#1083)

### Performance Improvements (PI)
- Use iterative DFS in PdfWriter._sweep_indirect_references (#1072)

### Bug Fixes (BUG)
- Let Page.scale also scale the crop-/trim-/bleed-/artbox (#1066)
- Column default for CCITTFaxDecode (#1079)

### Robustness (ROB)
- Guard against None-value in _get_outlines (#1060)

### Documentation (DOC)
- Stamps and watermarks (#1082)
- OCR vs PDF text extraction (#1081)
- Python Version support
- Formatting of CHANGELOG

### Developer Experience (DEV)
- Cache downloaded files (#1070)
- Speed-up for CI (#1069)

### Maintenance (MAINT)
- Set page.rotate(angle: int) (#1092)
- Issue #416 was fixed by #1015 (#1078)

### Testing (TST)
- Image extraction (#1080)
- Image extraction (#1077)

### Code Style (STY)
- Apply black
- Typo in Changelog

Full Changelog: https://github.com/py-pdf/PyPDF2/compare/2.4.2...2.5.0

## Version 2.4.2, 2022-07-05

### New Features (ENH)
Expand Down
53 changes: 49 additions & 4 deletions PyPDF2/_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
Iterator,
List,
Optional,
Set,
Tuple,
Union,
cast,
Expand Down Expand Up @@ -297,7 +298,7 @@ def createBlankPage(
deprecate_with_replacement("createBlankPage", "create_blank_page")
return PageObject.create_blank_page(pdf, width, height)

def rotate(self, angle: float) -> "PageObject":
def rotate(self, angle: int) -> "PageObject":
"""
Rotate a page clockwise by increments of 90 degrees.
Expand All @@ -313,11 +314,11 @@ def rotate(self, angle: float) -> "PageObject":
self[NameObject(PG.ROTATE)] = NumberObject(current_angle + angle)
return self

def rotate_clockwise(self, angle: float) -> "PageObject": # pragma: no cover
def rotate_clockwise(self, angle: int) -> "PageObject": # pragma: no cover
deprecate_with_replacement("rotate_clockwise", "rotate")
return self.rotate(angle)

def rotateClockwise(self, angle: float) -> "PageObject": # pragma: no cover
def rotateClockwise(self, angle: int) -> "PageObject": # pragma: no cover
"""
.. deprecated:: 1.28.0
Expand All @@ -326,7 +327,7 @@ def rotateClockwise(self, angle: float) -> "PageObject": # pragma: no cover
deprecate_with_replacement("rotateClockwise", "rotate")
return self.rotate(angle)

def rotateCounterClockwise(self, angle: float) -> "PageObject": # pragma: no cover
def rotateCounterClockwise(self, angle: int) -> "PageObject": # pragma: no cover
"""
.. deprecated:: 1.28.0
Expand Down Expand Up @@ -1447,6 +1448,18 @@ def extractText(
deprecate_with_replacement("extractText", "extract_text")
return self.extract_text(Tj_sep=Tj_sep, TJ_sep=TJ_sep)

def _get_fonts(self) -> Tuple[Set[str], Set[str]]:
"""
Get the names of embedded fonts and unembedded fonts.
:return: (Set of embedded fonts, set of unembedded fonts)
"""
obj = self.get_object()
assert isinstance(obj, DictionaryObject)
fonts, embedded = _get_fonts_walk(cast(DictionaryObject, obj["/Resources"]))
unembedded = fonts - embedded
return embedded, unembedded

mediabox = _create_rectangle_accessor(PG.MEDIABOX, ())
"""
A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units,
Expand Down Expand Up @@ -1595,3 +1608,35 @@ def __getitem__(self, index: int) -> PageObject:
def __iter__(self) -> Iterator[PageObject]:
for i in range(len(self)):
yield self[i]


def _get_fonts_walk(
obj: DictionaryObject,
fnt: Optional[Set[str]] = None,
emb: Optional[Set[str]] = None,
) -> Tuple[Set[str], Set[str]]:
"""
If there is a key called 'BaseFont', that is a font that is used in the document.
If there is a key called 'FontName' and another key in the same dictionary object
that is called 'FontFilex' (where x is null, 2, or 3), then that fontname is
embedded.
We create and add to two sets, fnt = fonts used and emb = fonts embedded.
"""
if fnt is None:
fnt = set()
if emb is None:
emb = set()
if not hasattr(obj, "keys"):
return set(), set()
fontkeys = ("/FontFile", "/FontFile2", "/FontFile3")
if "/BaseFont" in obj:
fnt.add(cast(str, obj["/BaseFont"]))
if "/FontName" in obj:
if [x for x in fontkeys if x in obj]: # test to see if there is FontFile
emb.add(cast(str, obj["/FontName"]))

for key in obj.keys():
_get_fonts_walk(cast(DictionaryObject, obj[key]), fnt, emb)

return fnt, emb # return the sets for each page
2 changes: 1 addition & 1 deletion PyPDF2/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "2.4.2"
__version__ = "2.5.0"
Loading

0 comments on commit 5715653

Please sign in to comment.