Skip to content

Commit

Permalink
Merge branch 'main' into ExtractText
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinThoma authored Jun 13, 2022
2 parents 534a8bb + 9c4e7f5 commit d92597a
Show file tree
Hide file tree
Showing 17 changed files with 118 additions and 51 deletions.
2 changes: 1 addition & 1 deletion .isort.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ indent=' '
multi_line_output=3
length_sort=0
include_trailing_comma=True
known_third_party = dataclasses,pytest,setuptools
known_third_party = pytest,setuptools
8 changes: 2 additions & 6 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# pre-commit run --all-files
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.1.0
rev: v4.3.0
hooks:
- id: check-ast
- id: check-byte-order-marker
Expand All @@ -24,10 +24,6 @@ repos:
# rev: v0.942
# hooks:
# - id: mypy
- repo: https://github.com/asottile/seed-isort-config
rev: v2.2.0
hooks:
- id: seed-isort-config
- repo: https://github.com/pre-commit/mirrors-isort
rev: v5.10.1
hooks:
Expand All @@ -48,7 +44,7 @@ repos:
- id: blacken-docs
additional_dependencies: [black==22.1.0]
- repo: https://github.com/asottile/pyupgrade
rev: v2.31.0
rev: v2.34.0
hooks:
- id: pyupgrade
args: [--py36-plus]
34 changes: 34 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,37 @@
Version 2.1.1, 2022-06-12
-------------------------

New Features (ENH):
- Add support for pathlib as input for PdfReader (#979)

Performance Improvements (PI):
- Optimize read_next_end_line (#646)

Bug Fixes (BUG):
- Adobe Acrobat \'Would you like to save this file?\' (#970)

Documentation (DOC):
- Notes on annotations (#982)
- Who uses PyPDF2
- intendet \xe2\x9e\x94 in robustness page (#958)

Maintenance (MAINT):
- pre-commit / requirements.txt updates (#977)
- Mark read_next_end_line as deprecated (#965)
- Export `PageObject` in PyPDF2 root (#960)

Testing (TST):
- Add MCVE of issue #416 (#980)
- FlateDecode.decode decodeParms (#964)
- Xmp module (#962)
- utils.paeth_predictor (#959)

Code Style (STY):
- Use more tuples and list/dict comprehensions (#976)

Full Changelog: https://github.com/py-pdf/PyPDF2/compare/2.1.0...2.1.1


Version 2.1.0, 2022-06-06
-------------------------

Expand Down
16 changes: 8 additions & 8 deletions PyPDF2/_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,13 +549,13 @@ def _merge_page(
def _expand_mediabox(
self, page2: "PageObject", ctm: Optional[CompressedTransformationMatrix]
) -> None:
corners1 = [
corners1 = (
self.mediabox.left.as_numeric(),
self.mediabox.bottom.as_numeric(),
self.mediabox.right.as_numeric(),
self.mediabox.top.as_numeric(),
]
corners2 = [
)
corners2 = (
page2.mediabox.left.as_numeric(),
page2.mediabox.bottom.as_numeric(),
page2.mediabox.left.as_numeric(),
Expand All @@ -564,17 +564,17 @@ def _expand_mediabox(
page2.mediabox.top.as_numeric(),
page2.mediabox.right.as_numeric(),
page2.mediabox.bottom.as_numeric(),
]
)
if ctm is not None:
ctm = tuple(float(x) for x in ctm) # type: ignore[assignment]
new_x = [
new_x = tuple(
ctm[0] * corners2[i] + ctm[2] * corners2[i + 1] + ctm[4]
for i in range(0, 8, 2)
]
new_y = [
)
new_y = tuple(
ctm[1] * corners2[i] + ctm[3] * corners2[i + 1] + ctm[5]
for i in range(0, 8, 2)
]
)
else:
new_x = corners2[0:8:2]
new_y = corners2[1:8:2]
Expand Down
14 changes: 7 additions & 7 deletions PyPDF2/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import warnings
from hashlib import md5
from io import BytesIO
from pathlib import Path
from typing import (
Any,
Callable,
Expand Down Expand Up @@ -234,7 +235,7 @@ class PdfReader:

def __init__(
self,
stream: StrByteType,
stream: Union[StrByteType, Path],
strict: bool = False,
password: Union[None, str, bytes] = None,
) -> None:
Expand All @@ -251,7 +252,7 @@ def __init__(
"It may not be read correctly.",
PdfReadWarning,
)
if isinstance(stream, str):
if isinstance(stream, (str, Path)):
with open(stream, "rb") as fh:
stream = BytesIO(b_(fh.read()))
self.read(stream)
Expand Down Expand Up @@ -525,7 +526,7 @@ def _check_kids(
self.get_fields(kid.get_object(), retval, fileobj)

def _write_field(self, fileobj: Any, field: Any, field_attributes: Any) -> None:
order = ["/TM", "/T", "/FT", PA.PARENT, "/TU", "/Ff", "/V", "/DV"]
order = ("/TM", "/T", "/FT", PA.PARENT, "/TU", "/Ff", "/V", "/DV")
for attr in order:
attr_name = field_attributes[attr]
try:
Expand Down Expand Up @@ -701,10 +702,9 @@ def _get_page_number_by_indirect(
) -> int:
"""Generate _page_id2num"""
if self._page_id2num is None:
id2num = {}
for i, x in enumerate(self.pages):
id2num[x.indirect_ref.idnum] = i # type: ignore
self._page_id2num = id2num
self._page_id2num = {
x.indirect_ref.idnum: i for i, x in enumerate(self.pages) # type: ignore
}

if indirect_ref is None or isinstance(indirect_ref, NullObject):
return -1
Expand Down
2 changes: 1 addition & 1 deletion PyPDF2/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "2.1.0"
__version__ = "2.1.1"
14 changes: 7 additions & 7 deletions PyPDF2/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,6 @@ def __init__(self) -> None:
)
self._root: Optional[IndirectObject] = None
self._root_object = root
self.set_need_appearances_writer()

def _add_object(self, obj: Optional[PdfObject]) -> IndirectObject:
self._objects.append(obj)
Expand Down Expand Up @@ -539,6 +538,7 @@ def update_page_form_field_values(
second bit sets Required, the third bit sets NoExport. See
PDF Reference Table 8.70 for details.
"""
self.set_need_appearances_writer()
# Iterate through pages, update field values
for j in range(len(page[PG.ANNOTS])): # type: ignore
writer_annot = page[PG.ANNOTS][j].get_object() # type: ignore
Expand Down Expand Up @@ -1209,7 +1209,7 @@ def remove_images(self, ignore_byte_string_object: bool = False) -> None:
"""
pg_dict = cast(DictionaryObject, self.get_object(self._pages))
pages = cast(ArrayObject, pg_dict[PA.KIDS])
jump_operators = [
jump_operators = (
b_("cm"),
b_("w"),
b_("J"),
Expand All @@ -1235,7 +1235,7 @@ def remove_images(self, ignore_byte_string_object: bool = False) -> None:
b_("B"),
b_("Do"),
b_("sh"),
]
)
for j in range(len(pages)):
page = pages[j]
page_ref = cast(DictionaryObject, self.get_object(page))
Expand Down Expand Up @@ -1539,15 +1539,15 @@ def addLink( # pragma: no cover
deprecate_with_replacement("addLink", "add_link")
return self.add_link(pagenum, pagedest, rect, border, fit, *args)

_valid_layouts = [
_valid_layouts = (
"/NoLayout",
"/SinglePage",
"/OneColumn",
"/TwoColumnLeft",
"/TwoColumnRight",
"/TwoPageLeft",
"/TwoPageRight",
]
)

def _get_page_layout(self) -> Optional[LayoutType]:
try:
Expand Down Expand Up @@ -1656,14 +1656,14 @@ def pageLayout(self, layout: LayoutType) -> None: # pragma: no cover
deprecate_with_replacement("pageLayout", "page_layout")
self.page_layout = layout

_valid_modes = [
_valid_modes = (
"/UseNone",
"/UseOutlines",
"/UseThumbs",
"/FullScreen",
"/UseOC",
"/UseAttachments",
]
)

def _get_page_mode(self) -> Optional[PagemodeType]:
try:
Expand Down
4 changes: 2 additions & 2 deletions PyPDF2/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ class CatalogDictionary:
NEEDS_RENDERING = "/NeedsRendering" # boolean, optional


PDF_KEYS = [
PDF_KEYS = (
PagesAttributes,
PageAttributes,
Ressources,
Expand All @@ -302,4 +302,4 @@ class CatalogDictionary:
Core,
TrailerKeys,
CatalogAttributes,
]
)
4 changes: 2 additions & 2 deletions PyPDF2/papersizes.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class PaperSize:
C4 = Dimensions(649, 918)


_din_a = [
_din_a = (
PaperSize.A0,
PaperSize.A1,
PaperSize.A2,
Expand All @@ -45,4 +45,4 @@ class PaperSize:
PaperSize.A6,
PaperSize.A7,
PaperSize.A8,
]
)
15 changes: 15 additions & 0 deletions docs/user/reading-pdf-annotations.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,21 @@ PDF 1.7 defines 25 different annotation types:
* Watermark
* 3D

In general, annotations can be read like this:

```python
from PyPDF2 import PdfReader

reader = PdfReader("commented.pdf")

for page in reader.pages:
if "/Annots" in page:
for annot in page["/Annots"]:
obj = annot.get_object()
annotation = {"subtype": obj["/Subtype"], "location": obj["/Rect"]}
print(annotation)
```

Reading the most common ones is described here.

## Text
Expand Down
6 changes: 3 additions & 3 deletions make_changelog.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
"""Internal tool to update the changelog."""

import subprocess
from dataclasses import dataclass
from datetime import datetime
from typing import List

from dataclasses import dataclass


@dataclass(frozen=True)
class Change:
Expand Down Expand Up @@ -63,7 +62,7 @@ def get_formatted_changes(git_tag: str) -> str:
grouped[commit.prefix].append({"msg": commit.message})

# Order prefixes
order = ["DEP", "ENH", "BUG", "ROB", "DOC", "DEV", "MAINT", "TST", "STY"]
order = ["DEP", "ENH", "PI", "BUG", "ROB", "DOC", "DEV", "MAINT", "TST", "STY"]
abbrev2long = {
"DEP": "Deprecations",
"ENH": "New Features",
Expand All @@ -74,6 +73,7 @@ def get_formatted_changes(git_tag: str) -> str:
"MAINT": "Maintenance",
"TST": "Testing",
"STY": "Code Style",
"PI": "Performance Improvements",
}

# Create output
Expand Down
10 changes: 5 additions & 5 deletions requirements/ci.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ iniconfig==1.1.1
# via pytest
mccabe==0.6.1
# via flake8
more-itertools==8.12.0
more-itertools==8.13.0
# via flake8-implicit-str-concat
mypy==0.950
mypy==0.961
# via -r requirements/ci.in
mypy-extensions==0.4.3
# via mypy
Expand All @@ -48,7 +48,7 @@ pycodestyle==2.8.0
# via flake8
pyflakes==2.4.0
# via flake8
pyparsing==3.0.8
pyparsing==3.0.9
# via packaging
pytest==7.0.1
# via
Expand All @@ -60,11 +60,11 @@ tomli==1.2.3
# via
# mypy
# pytest
typed-ast==1.5.3
typed-ast==1.5.4
# via mypy
typeguard==2.13.3
# via -r requirements/ci.in
types-pillow==9.0.14
types-pillow==9.0.19
# via -r requirements/ci.in
typing-extensions==4.1.1
# via
Expand Down
10 changes: 5 additions & 5 deletions requirements/dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ black==22.3.0
# via -r requirements/dev.in
bleach==4.1.0
# via readme-renderer
certifi==2021.10.8
certifi==2022.5.18.1
# via requests
cffi==1.15.0
# via cryptography
Expand All @@ -26,7 +26,7 @@ colorama==0.4.4
# via twine
coverage[toml]==6.2
# via pytest-cov
cryptography==37.0.1
cryptography==37.0.2
# via secretstorage
dataclasses==0.8
# via black
Expand Down Expand Up @@ -77,7 +77,7 @@ pep517==0.12.0
# via pip-tools
pip-tools==6.4.0
# via -r requirements/dev.in
pkginfo==1.8.2
pkginfo==1.8.3
# via twine
platformdirs==2.4.0
# via
Expand All @@ -93,7 +93,7 @@ pycparser==2.21
# via cffi
pygments==2.12.0
# via readme-renderer
pyparsing==3.0.8
pyparsing==3.0.9
# via packaging
pytest==7.0.1
# via pytest-cov
Expand Down Expand Up @@ -129,7 +129,7 @@ tqdm==4.64.0
# via twine
twine==3.8.0
# via -r requirements/dev.in
typed-ast==1.5.3
typed-ast==1.5.4
# via black
typing-extensions==4.1.1
# via
Expand Down
Loading

0 comments on commit d92597a

Please sign in to comment.