Skip to content

Commit

Permalink
ENH: Add writer.pdf_header property (getter and setter) (#1038)
Browse files Browse the repository at this point in the history
When writing a PDF, set the version to the highest PDF version of the
written ones

Closes #162

Co-authored-by: Matthew Peveler <[email protected]>
  • Loading branch information
MartinThoma and MasterOdin authored Jun 29, 2022
1 parent 0215cc7 commit eedf0e0
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 2 deletions.
2 changes: 2 additions & 0 deletions PyPDF2/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,8 @@ def __init__(

@property
def pdf_header(self) -> str:
# TODO: Make this return a bytes object for consistency
# but that needs a deprecation
loc = self.stream.tell()
self.stream.seek(0, 0)
pdf_file_version = self.stream.read(8).decode("utf-8")
Expand Down
19 changes: 19 additions & 0 deletions PyPDF2/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,25 @@
DEPR_MSG = "{} is deprecated and will be removed in PyPDF2 3.0.0. Use {} instead."


def _get_max_pdf_version_header(header1: bytes, header2: bytes) -> bytes:
versions = (
b"%PDF-1.3",
b"%PDF-1.4",
b"%PDF-1.5",
b"%PDF-1.6",
b"%PDF-1.7",
b"%PDF-2.0",
)
pdf_header_indices = []
if header1 in versions:
pdf_header_indices.append(versions.index(header1))
if header2 in versions:
pdf_header_indices.append(versions.index(header2))
if len(pdf_header_indices) == 0:
raise ValueError(f"neither {header1!r} nor {header2!r} are proper headers")
return versions[max(pdf_header_indices)]


def read_until_whitespace(stream: StreamType, maxchars: Optional[int] = None) -> bytes:
"""
Read non-whitespace characters and return them.
Expand Down
29 changes: 27 additions & 2 deletions PyPDF2/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,12 @@
from ._page import PageObject, _VirtualList
from ._reader import PdfReader
from ._security import _alg33, _alg34, _alg35
from ._utils import StreamType, b_, deprecate_with_replacement
from ._utils import (
StreamType,
_get_max_pdf_version_header,
b_,
deprecate_with_replacement,
)
from .constants import CatalogAttributes as CA
from .constants import Core as CO
from .constants import EncryptionDictAttributes as ED
Expand Down Expand Up @@ -127,6 +132,21 @@ def __init__(self) -> None:
self._root: Optional[IndirectObject] = None
self._root_object = root

@property
def pdf_header(self) -> bytes:
"""
Header of the PDF document that is written.
This should be something like b'%PDF-1.5'. It is recommended to set the
lowest version that supports all features which are used within the
PDF file.
"""
return self._header

@pdf_header.setter
def pdf_header(self, new_header: bytes) -> None:
self._header = new_header

def _add_object(self, obj: Optional[PdfObject]) -> IndirectObject:
self._objects.append(obj)
return IndirectObject(len(self._objects), 0, self)
Expand All @@ -149,6 +169,11 @@ def _add_page(
self, page: PageObject, action: Callable[[Any, IndirectObject], None]
) -> None:
assert page[PA.TYPE] == CO.PAGE
if page.pdf is not None:
other = page.pdf.pdf_header
if isinstance(other, str):
other = other.encode() # type: ignore
self.pdf_header = _get_max_pdf_version_header(self.pdf_header, other) # type: ignore
page[NameObject(PA.PARENT)] = self._pages
page_ind = self._add_object(page)
pages = cast(DictionaryObject, self.get_object(self._pages))
Expand Down Expand Up @@ -739,7 +764,7 @@ def write(self, stream: StreamType) -> None:

def _write_header(self, stream: StreamType) -> List[int]:
object_positions = []
stream.write(self._header + b"\n")
stream.write(self.pdf_header + b"\n")
stream.write(b"%\xE2\xE3\xCF\xD3\n")
for i, obj in enumerate(self._objects):
obj = self._objects[i]
Expand Down
7 changes: 7 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import PyPDF2._utils
from PyPDF2._utils import (
_get_max_pdf_version_header,
mark_location,
matrix_multiply,
read_block_backwards,
Expand Down Expand Up @@ -212,3 +213,9 @@ def test_read_previous_line2():
b"d" * (2 * io.DEFAULT_BUFFER_SIZE),
6,
)


def test_get_max_pdf_version_header():
with pytest.raises(ValueError) as exc:
_get_max_pdf_version_header(b"", b"PDF-1.2")
assert exc.value.args[0] == "neither b'' nor b'PDF-1.2' are proper headers"
12 changes: 12 additions & 0 deletions tests/test_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,3 +499,15 @@ def test_write_bookmark_on_page_fitv():

# cleanup
os.remove("tmp-merger-do-not-commit.pdf")


def test_pdf_header():
writer = PdfWriter()
assert writer.pdf_header == b"%PDF-1.3"

reader = PdfReader(os.path.join(RESOURCE_ROOT, "crazyones.pdf"))
writer.add_page(reader.pages[0])
assert writer.pdf_header == b"%PDF-1.5"

writer.pdf_header = b"%PDF-1.6"
assert writer.pdf_header == b"%PDF-1.6"

0 comments on commit eedf0e0

Please sign in to comment.