Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Add decrypt support for V5 and AES-128, AES-256 (R5 only) #749

Merged
merged 27 commits into from
Jun 19, 2022
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
721b5b6
decrypt support V4 and AES-128
exiledkingcc Apr 12, 2022
2f8e33a
fix and update test
exiledkingcc Apr 15, 2022
26af6ea
add pure python AES
exiledkingcc Apr 15, 2022
46e1da3
FIX: allow use owner password to decrypt
exiledkingcc Apr 16, 2022
970125e
FIX: merge encrypted pdf
exiledkingcc Apr 16, 2022
47cfbd5
decrypt support V=5 and R=5, which uses AES-256
exiledkingcc Apr 18, 2022
28cd603
Merge branch 'master' into encryption
exiledkingcc Apr 18, 2022
84532bb
remove AES code for easier maintaining
exiledkingcc Apr 21, 2022
87bf5b1
add pycryptodome to `extras_require`
exiledkingcc Apr 21, 2022
0348842
Merge branch 'master' into encryption
exiledkingcc Apr 24, 2022
fa439c3
allow decrypt password to be bytes
exiledkingcc Apr 24, 2022
63788f6
Merge branch 'master' into encryption
exiledkingcc Apr 25, 2022
1425f65
Merge branch 'master' into encryption
exiledkingcc Apr 27, 2022
ba45481
Merge branch 'master' into encryption
exiledkingcc May 5, 2022
2cfece2
make flake8 happy
exiledkingcc May 5, 2022
042410b
tag test_encryption with 'no_py27'
exiledkingcc May 5, 2022
24c8242
Merge branch 'master' into encryption
exiledkingcc Jun 11, 2022
cc50897
update for flake8
exiledkingcc Jun 11, 2022
30bd9ca
Merge branch 'master' into encryption
exiledkingcc Jun 12, 2022
c2b90bb
Merge branch 'main' into encryption
MartinThoma Jun 14, 2022
320920d
Merge branch 'main' into encryption
MartinThoma Jun 14, 2022
45de13f
Merge branch 'main' into encryption
MartinThoma Jun 16, 2022
49125b5
Merge branch 'main' into encryption
MartinThoma Jun 16, 2022
e201761
Merge branch 'main' into encryption
MartinThoma Jun 17, 2022
d628afe
Merge branch 'main' into encryption
MartinThoma Jun 19, 2022
6172a51
Add pragma no-cover to base class
MartinThoma Jun 19, 2022
8cc4a89
Merge branch 'main' into encryption
MartinThoma Jun 19, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
827 changes: 827 additions & 0 deletions PyPDF2/_aes.py

Large diffs are not rendered by default.

721 changes: 721 additions & 0 deletions PyPDF2/encryption.py

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions PyPDF2/merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=T
# it is a PdfFileReader, copy that reader's stream into a
# BytesIO (or StreamIO) stream.
# If fileobj is none of the above types, it is not modified
decryption_key = None
_encryption = None
if isString(fileobj):
fileobj = file(fileobj, 'rb')
my_file = True
Expand All @@ -125,8 +125,8 @@ def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=T
fileobj = StreamIO(filecontent)
my_file = True
elif isinstance(fileobj, PdfFileReader):
if hasattr(fileobj, '_decryption_key'):
decryption_key = fileobj._decryption_key
if hasattr(fileobj, '_encryption'):
_encryption = fileobj._encryption
orig_tell = fileobj.stream.tell()
fileobj.stream.seek(0)
filecontent = StreamIO(fileobj.stream.read())
Expand All @@ -137,8 +137,8 @@ def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=T
# Create a new PdfFileReader instance using the stream
# (either file or BytesIO or StringIO) created above
pdfr = PdfFileReader(fileobj, strict=self.strict, overwriteWarnings=self.overwriteWarnings)
if decryption_key is not None:
pdfr._decryption_key = decryption_key
if _encryption is not None:
pdfr._encryption = _encryption

# Find the range of pages to merge.
if pages is None:
Expand Down
88 changes: 14 additions & 74 deletions PyPDF2/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1740,17 +1740,9 @@ def getObject(self, indirectReference):

# override encryption is used for the /Encrypt dictionary
if not self._override_encryption and self.isEncrypted:
# if we don't have the encryption key:
if not hasattr(self, '_decryption_key'):
raise PdfReadError("file has not been decrypted")
# otherwise, decrypt here...
pack1 = struct.pack("<i", indirectReference.idnum)[:3]
pack2 = struct.pack("<i", indirectReference.generation)[:2]
key = self._decryption_key + pack1 + pack2
assert len(key) == (len(self._decryption_key) + 5)
md5_hash = md5(key).digest()
key = md5_hash[:min(16, len(self._decryption_key) + 5)]
retval = self._decryptObject(retval, key)
if not hasattr(self, "_encryption"):
raise utils.PdfReadError("file has not been decrypted")
retval = self._encryption.decryptObject(retval, indirectReference.idnum, indirectReference.generation)
else:
warnings.warn("Object %d %d not defined."%(indirectReference.idnum,
indirectReference.generation), PdfReadWarning)
Expand Down Expand Up @@ -2143,69 +2135,17 @@ def decode_permissions(self, permissions_code):
return permissions

def _decrypt(self, password):
# Decrypts data as per Section 3.5 (page 117) of PDF spec v1.7
# "The security handler defines the use of encryption and decryption in
# the document, using the rules specified by the CF, StmF, and StrF entries"
encrypt = self.trailer[TK.ENCRYPT].getObject()
# /Encrypt Keys:
# Filter (name) : "name of the preferred security handler "
# V (number) : Algorithm Code
# Length (integer): Length of encryption key, in bits
# CF (dictionary) : Crypt filter
# StmF (name) : Name of the crypt filter that is used by default when decrypting streams
# StrF (name) : The name of the crypt filter that is used when decrypting all strings in the document
# R (number) : Standard security handler revision number
# U (string) : A 32-byte string, based on the user password
# P (integer) : Permissions allowed with user access
if encrypt['/Filter'] != '/Standard':
raise NotImplementedError("only Standard PDF encryption handler is available")
if not (encrypt['/V'] in (1, 2)):
raise NotImplementedError("only algorithm code 1 and 2 are supported. This PDF uses code %s" % encrypt['/V'])
user_password, key = self._authenticateUserPassword(password)
if user_password:
self._decryption_key = key
return 1
else:
rev = encrypt['/R'].getObject()
if rev == 2:
keylen = 5
else:
keylen = encrypt[SA.LENGTH].getObject() // 8
key = _alg33_1(password, rev, keylen)
real_O = encrypt["/O"].getObject()
if rev == 2:
userpass = utils.RC4_encrypt(key, real_O)
else:
val = real_O
for i in range(19, -1, -1):
new_key = b_('')
for l in range(len(key)):
new_key += b_(chr(utils.ord_(key[l]) ^ i))
val = utils.RC4_encrypt(new_key, val)
userpass = val
owner_password, key = self._authenticateUserPassword(userpass)
if owner_password:
self._decryption_key = key
return 2
return 0

def _authenticateUserPassword(self, password):
encrypt = self.trailer[TK.ENCRYPT].getObject()
rev = encrypt['/R'].getObject()
owner_entry = encrypt['/O'].getObject()
p_entry = encrypt['/P'].getObject()
id_entry = self.trailer[TK.ID].getObject()
id1_entry = id_entry[0].getObject()
real_U = encrypt['/U'].getObject().original_bytes
if rev == 2:
U, key = _alg34(password, owner_entry, p_entry, id1_entry)
elif rev >= 3:
U, key = _alg35(password, rev,
encrypt[SA.LENGTH].getObject() // 8, owner_entry,
p_entry, id1_entry,
encrypt.get("/EncryptMetadata", BooleanObject(False)).getObject())
U, real_U = U[:16], real_U[:16]
return U == real_U, key
from PyPDF2.encryption import Encryption
id_entry = self.trailer.get(TK.ID)
id1_entry = id_entry[0].getObject().original_bytes if id_entry else b""
encrypt_entry = self.trailer[TK.ENCRYPT].getObject()
encryption = Encryption.read(encrypt_entry, id1_entry)
# maybe password is owner password
# TODO: add/modify api to set owner password
rr = encryption.verify(password, password)
if rr > 0:
self._encryption = encryption
return rr

def getIsEncrypted(self):
return TK.ENCRYPT in self.trailer
Expand Down
Binary file added Resources/encryption/enc0.pdf
Binary file not shown.
Binary file added Resources/encryption/enc1.pdf
Binary file not shown.
Binary file added Resources/encryption/enc2.pdf
Binary file not shown.
Binary file added Resources/encryption/enc3.pdf
Binary file not shown.
Binary file added Resources/encryption/enc4.pdf
Binary file not shown.
Binary file added Resources/encryption/enc5.pdf
Binary file not shown.
Binary file added Resources/encryption/enc6.pdf
Binary file not shown.
Binary file added Resources/encryption/enc7.pdf
Binary file not shown.
Binary file added Resources/encryption/enc8.pdf
Binary file not shown.
Binary file added Resources/encryption/enc9.pdf
Binary file not shown.
Binary file added Resources/encryption/enca.pdf
Binary file not shown.
Binary file added Resources/encryption/encb.pdf
Binary file not shown.
84 changes: 84 additions & 0 deletions Tests/test_encryption.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import os
import pytest
import PyPDF2

TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "Resources")


@pytest.mark.parametrize(
"src",
[
# unencrypted pdf
(os.path.join(RESOURCE_ROOT, "encryption", "enc0.pdf")),

# created by `qpdf --encrypt "" "" 40 -- enc0.pdf enc1.pdf`
(os.path.join(RESOURCE_ROOT, "encryption", "enc1.pdf")),
# created by `qpdf --encrypt "" "" 128 -- enc0.pdf enc2.pdf`
(os.path.join(RESOURCE_ROOT, "encryption", "enc2.pdf")),
# created by `qpdf --encrypt "asdfzxcv" "" 40 -- enc0.pdf enc3.pdf`
(os.path.join(RESOURCE_ROOT, "encryption", "enc3.pdf")),
# created by `qpdf --encrypt "asdfzxcv" "" 128 -- enc0.pdf enc4.pdf`
(os.path.join(RESOURCE_ROOT, "encryption", "enc4.pdf")),

# V=4 and AES128
# created by `qpdf --encrypt "asdfzxcv" "" 128 --force-V4 -- enc0.pdf enc5.pdf`
(os.path.join(RESOURCE_ROOT, "encryption", "enc5.pdf")),
# created by `qpdf --encrypt "asdfzxcv" "" 128 --use-aes=y -- enc0.pdf enc6.pdf`
(os.path.join(RESOURCE_ROOT, "encryption", "enc6.pdf")),

# # V=5 and R=5 use AES-256
# # created by `qpdf --encrypt "" "" 256 --force-R5 -- enc0.pdf enc7.pdf`
(os.path.join(RESOURCE_ROOT, "encryption", "enc7.pdf")),
# # created by `qpdf --encrypt "asdfzxcv" "" 256 --force-R5 -- enc0.pdf enc8.pdf`
(os.path.join(RESOURCE_ROOT, "encryption", "enc8.pdf")),
# # created by `qpdf --encrypt "" "asdfzxcv" 256 --force-R5 -- enc0.pdf enc9.pdf`
(os.path.join(RESOURCE_ROOT, "encryption", "enc9.pdf")),

# asdfzxcv is owner password
# created by `qpdf --encrypt "" "asdfzxcv" 128 --use-aes=y -- enc0.pdf enca.pdf`
(os.path.join(RESOURCE_ROOT, "encryption", "enca.pdf")),
# created by `qpdf --encrypt "1234" "asdfzxcv" 128 --use-aes=y -- enc0.pdf encb.pdf`
(os.path.join(RESOURCE_ROOT, "encryption", "encb.pdf")),
],
)
def test_encryption(src):
with open(src, "rb") as inputfile:
ipdf = PyPDF2.PdfFileReader(inputfile)
if src.endswith("enc0.pdf"):
assert ipdf.isEncrypted == False
else:
assert ipdf.isEncrypted == True
ipdf.decrypt("asdfzxcv")
assert ipdf.getNumPages() == 1
metadict = ipdf.getDocumentInfo()
dd = dict(metadict)
# remove empty value entry
dd = {x[0]: x[1] for x in dd.items() if x[1]}
assert dd == {
'/Author': 'cheng',
'/CreationDate': "D:20220414132421+05'24'",
'/Creator': 'WPS Writer',
'/ModDate': "D:20220414132421+05'24'",
'/SourceModified': "D:20220414132421+05'24'",
'/Trapped': '/False'
}


@pytest.mark.parametrize(
"names",
[
(["enc0.pdf", "enc4.pdf", "enc5.pdf", "enc6.pdf"]),
],
)
def test_encryption_merge(names):
pdf_merger = PyPDF2.PdfFileMerger()
files = [os.path.join(RESOURCE_ROOT, "encryption", x) for x in names]
pdfs = [PyPDF2.PdfFileReader(x) for x in files]
for pdf in pdfs:
if pdf.isEncrypted:
pdf.decrypt("asdfzxcv")
pdf_merger.append(pdf)
# no need to write to file
pdf_merger.close()