Skip to content

Commit

Permalink
ENH: Add PdfReader.pdf_header (#1013)
Browse files Browse the repository at this point in the history
The new attribute will return the first bytes of the PDF file. This is typically something like `"%PDF-1.4"`.
That can be used to get the PDF version of the file - at least the version the file claims to have.
  • Loading branch information
MartinThoma authored Jun 26, 2022
1 parent a8c4dd9 commit 3c750c1
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 0 deletions.
8 changes: 8 additions & 0 deletions PyPDF2/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,14 @@ def __init__(
if password is not None and self.decrypt(password) == 0:
raise PdfReadError("Wrong password")

@property
def pdf_header(self) -> str:
loc = self.stream.tell()
self.stream.seek(0, 0)
pdf_file_version = self.stream.read(8).decode("utf-8")
self.stream.seek(loc, 0) # return to where it was
return pdf_file_version

@property
def metadata(self) -> Optional[DocumentInformation]:
"""
Expand Down
13 changes: 13 additions & 0 deletions tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -821,3 +821,16 @@ def test_unexpected_destination():
with pytest.raises(PdfReadError) as exc:
merger.append(reader)
assert exc.value.args[0] == "Unexpected destination '/1'"


@pytest.mark.parametrize(
"src,pdf_header",
[
(os.path.join(RESOURCE_ROOT, "attachment.pdf"), "%PDF-1.5"),
(os.path.join(RESOURCE_ROOT, "crazyones.pdf"), "%PDF-1.5"),
],
)
def test_header(src, pdf_header):
reader = PdfReader(src)

assert reader.pdf_header == pdf_header

0 comments on commit 3c750c1

Please sign in to comment.