From 596e628531d28c5f86c1a4d47f8f19999666720d Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Sun, 3 Jul 2022 17:21:26 +0200 Subject: [PATCH 01/13] TST: No pycryptodome --- .github/workflows/github-ci.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/github-ci.yaml b/.github/workflows/github-ci.yaml index 576c64e6f..6bbc16b57 100644 --- a/.github/workflows/github-ci.yaml +++ b/.github/workflows/github-ci.yaml @@ -21,7 +21,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.6", "3.7", "3.8", "3.9", "3.10"] + python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", "3.10.3"] steps: - name: Checkout Code @@ -38,6 +38,10 @@ jobs: - name: Install requirements (Python 3) run: | pip install -r requirements/ci.txt + - name: Remove cryptodome + run: | + pip uninstall pycryptodome -y + if: matrix.python-version == '3.10.3' - name: Install PyPDF2 run: | pip install . From 60904ebe669c4fd45edca8d4de44cc2156724153 Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Sun, 3 Jul 2022 17:37:28 +0200 Subject: [PATCH 02/13] Adjust tests --- tests/test_encryption.py | 46 ++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/tests/test_encryption.py b/tests/test_encryption.py index c357fc237..056408f55 100644 --- a/tests/test_encryption.py +++ b/tests/test_encryption.py @@ -3,6 +3,14 @@ import pytest import PyPDF2 +from PyPDF2.errors import DependencyError + +try: + from Crypto.Cipher import AES + + HAS_PYCRYPTODOME = True +except ImportError: + HAS_PYCRYPTODOME = False TESTS_ROOT = os.path.abspath(os.path.dirname(__file__)) PROJECT_ROOT = os.path.dirname(TESTS_ROOT) @@ -10,37 +18,37 @@ @pytest.mark.parametrize( - "name", + ("name", "requres_pycryptodome"), [ # unencrypted pdf - "unencrypted.pdf", + ("unencrypted.pdf", False), # created by `qpdf --encrypt "" "" 40 -- unencrypted.pdf r2-empty-password.pdf` - "r2-empty-password.pdf", + ("r2-empty-password.pdf", False), # created by `qpdf --encrypt "" "" 128 -- unencrypted.pdf r3-empty-password.pdf` - "r3-empty-password.pdf", + ("r3-empty-password.pdf", False), # created by `qpdf --encrypt "asdfzxcv" "" 40 -- unencrypted.pdf r2-user-password.pdf` - "r2-user-password.pdf", + ("r2-user-password.pdf", False), # created by `qpdf --encrypt "asdfzxcv" "" 128 -- unencrypted.pdf r3-user-password.pdf` - "r3-user-password.pdf", + ("r3-user-password.pdf", False), # created by `qpdf --encrypt "asdfzxcv" "" 128 --force-V4 -- unencrypted.pdf r4-user-password.pdf` - "r4-user-password.pdf", + ("r4-user-password.pdf", False), # created by `qpdf --encrypt "asdfzxcv" "" 128 --use-aes=y -- unencrypted.pdf r4-aes-user-password.pdf` - "r4-aes-user-password.pdf", + ("r4-aes-user-password.pdf", True), # # created by `qpdf --encrypt "" "" 256 --force-R5 -- unencrypted.pdf r5-empty-password.pdf` - "r5-empty-password.pdf", + ("r5-empty-password.pdf", True), # # created by `qpdf --encrypt "asdfzxcv" "" 256 --force-R5 -- unencrypted.pdf r5-user-password.pdf` - "r5-user-password.pdf", + ("r5-user-password.pdf", True), # # created by `qpdf --encrypt "" "asdfzxcv" 256 --force-R5 -- unencrypted.pdf r5-owner-password.pdf` - "r5-owner-password.pdf", + ("r5-owner-password.pdf", True), # created by `qpdf --encrypt "" "" 256 -- unencrypted.pdf r6-empty-password.pdf` - "r6-empty-password.pdf", + ("r6-empty-password.pdf", True), # created by `qpdf --encrypt "asdfzxcv" "" 256 -- unencrypted.pdf r6-user-password.pdf` - "r6-user-password.pdf", + ("r6-user-password.pdf", True), # created by `qpdf --encrypt "" "asdfzxcv" 256 -- unencrypted.pdf r6-owner-password.pdf` - "r6-owner-password.pdf", + ("r6-owner-password.pdf", True), ], ) -def test_encryption(name): +def test_encryption(name, requres_pycryptodome): inputfile = os.path.join(RESOURCE_ROOT, "encryption", name) ipdf = PyPDF2.PdfReader(inputfile) if inputfile.endswith("unencrypted.pdf"): @@ -49,7 +57,13 @@ def test_encryption(name): assert ipdf.is_encrypted ipdf.decrypt("asdfzxcv") assert len(ipdf.pages) == 1 - dd = dict(ipdf.metadata) + if requres_pycryptodome and not HAS_PYCRYPTODOME: + with pytest.raises(DependencyError) as exc: + dd = dict(ipdf.metadata) + assert exc.value.args[0] == "PyCryptodome is required for AES algorithm" + return + else: + dd = dict(ipdf.metadata) # remove empty value entry dd = {x[0]: x[1] for x in dd.items() if x[1]} assert dd == { From 480dee8fd9f442b34c7de8bd12feae33365f43f8 Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Sun, 3 Jul 2022 17:38:46 +0200 Subject: [PATCH 03/13] noqa --- tests/test_encryption.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_encryption.py b/tests/test_encryption.py index 056408f55..6dfb16a42 100644 --- a/tests/test_encryption.py +++ b/tests/test_encryption.py @@ -6,7 +6,7 @@ from PyPDF2.errors import DependencyError try: - from Crypto.Cipher import AES + from Crypto.Cipher import AES # noqa: F401 HAS_PYCRYPTODOME = True except ImportError: From c8a526b3c5bd90db30abddaa1c114919f9a2ac6e Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Sun, 3 Jul 2022 17:44:15 +0200 Subject: [PATCH 04/13] Bigger block --- tests/test_encryption.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/tests/test_encryption.py b/tests/test_encryption.py index 6dfb16a42..ea4cc7d33 100644 --- a/tests/test_encryption.py +++ b/tests/test_encryption.py @@ -50,19 +50,26 @@ ) def test_encryption(name, requres_pycryptodome): inputfile = os.path.join(RESOURCE_ROOT, "encryption", name) - ipdf = PyPDF2.PdfReader(inputfile) - if inputfile.endswith("unencrypted.pdf"): - assert not ipdf.is_encrypted - else: - assert ipdf.is_encrypted - ipdf.decrypt("asdfzxcv") - assert len(ipdf.pages) == 1 if requres_pycryptodome and not HAS_PYCRYPTODOME: with pytest.raises(DependencyError) as exc: + ipdf = PyPDF2.PdfReader(inputfile) + if inputfile.endswith("unencrypted.pdf"): + assert not ipdf.is_encrypted + else: + assert ipdf.is_encrypted + ipdf.decrypt("asdfzxcv") + assert len(ipdf.pages) == 1 dd = dict(ipdf.metadata) assert exc.value.args[0] == "PyCryptodome is required for AES algorithm" return else: + ipdf = PyPDF2.PdfReader(inputfile) + if inputfile.endswith("unencrypted.pdf"): + assert not ipdf.is_encrypted + else: + assert ipdf.is_encrypted + ipdf.decrypt("asdfzxcv") + assert len(ipdf.pages) == 1 dd = dict(ipdf.metadata) # remove empty value entry dd = {x[0]: x[1] for x in dd.items() if x[1]} From a37a88485be63376da50435c2502146aa501903e Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Sun, 3 Jul 2022 17:52:09 +0200 Subject: [PATCH 05/13] Adjust if pycryptodomoe is not present --- tests/test_encryption.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test_encryption.py b/tests/test_encryption.py index ea4cc7d33..7a68fb155 100644 --- a/tests/test_encryption.py +++ b/tests/test_encryption.py @@ -91,6 +91,8 @@ def test_encryption(name, requres_pycryptodome): ], ) def test_both_password(name, user_passwd, owner_passwd): + if not HAS_PYCRYPTODOME: + return from PyPDF2 import PasswordType inputfile = os.path.join(RESOURCE_ROOT, "encryption", name) @@ -115,6 +117,8 @@ def test_both_password(name, user_passwd, owner_passwd): ], ) def test_encryption_merge(names): + if not HAS_PYCRYPTODOME: + return pdf_merger = PyPDF2.PdfMerger() files = [os.path.join(RESOURCE_ROOT, "encryption", x) for x in names] pdfs = [PyPDF2.PdfReader(x) for x in files] From 0ea9474590039841888ff8d420a95817778bcd58 Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Sun, 3 Jul 2022 17:57:27 +0200 Subject: [PATCH 06/13] Onre more test --- tests/test_encryption.py | 19 +++++++++++++++++++ tests/test_reader.py | 17 ----------------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/tests/test_encryption.py b/tests/test_encryption.py index 7a68fb155..3ee3d53d6 100644 --- a/tests/test_encryption.py +++ b/tests/test_encryption.py @@ -103,6 +103,25 @@ def test_both_password(name, user_passwd, owner_passwd): assert len(ipdf.pages) == 1 +@pytest.mark.parametrize( + ("pdffile", "password"), + [ + ("crazyones-encrypted-256.pdf", "password"), + ], +) +def test_get_page_of_encrypted_file_new_algorithm(pdffile, password): + """ + Check if we can read a page of an encrypted file. + + This is a regression test for issue 327: + IndexError for get_page() of decrypted file + """ + if not HAS_PYCRYPTODOME: + return + path = os.path.join(RESOURCE_ROOT, pdffile) + PyPDF2.PdfReader(path, password=password).pages[0] + + @pytest.mark.parametrize( "names", [ diff --git a/tests/test_reader.py b/tests/test_reader.py index 49ec3e79f..6018a196c 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -280,23 +280,6 @@ def test_get_page_of_encrypted_file(pdffile, password, should_fail): PdfReader(path, password=password).pages[0] -@pytest.mark.parametrize( - ("pdffile", "password"), - [ - ("crazyones-encrypted-256.pdf", "password"), - ], -) -def test_get_page_of_encrypted_file_new_algorithm(pdffile, password): - """ - Check if we can read a page of an encrypted file. - - This is a regression test for issue 327: - IndexError for get_page() of decrypted file - """ - path = os.path.join(RESOURCE_ROOT, pdffile) - PdfReader(path, password=password).pages[0] - - @pytest.mark.parametrize( ("src", "expected", "expected_get_fields"), [ From 46fefa6082f028f1dc4301f8567436506c24b5fe Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Sun, 3 Jul 2022 18:20:01 +0200 Subject: [PATCH 07/13] Make mypy happy --- PyPDF2/_encryption.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyPDF2/_encryption.py b/PyPDF2/_encryption.py index 621cc2d52..4327baf63 100644 --- a/PyPDF2/_encryption.py +++ b/PyPDF2/_encryption.py @@ -56,7 +56,7 @@ class CryptIdentity(CryptBase): try: - from Crypto.Cipher import AES, ARC4 + from Crypto.Cipher import AES, ARC4 # type: ignore[import] class CryptRC4(CryptBase): def __init__(self, key: bytes) -> None: From fefa6fbc1c80e7e9d4367606bb1db8806fe0a8b5 Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Sun, 3 Jul 2022 19:50:51 +0200 Subject: [PATCH 08/13] Make non-usage of pycryptodome explicit Co-authored-by: Matthew Peveler --- .github/workflows/github-ci.yaml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/github-ci.yaml b/.github/workflows/github-ci.yaml index 6bbc16b57..734b8113b 100644 --- a/.github/workflows/github-ci.yaml +++ b/.github/workflows/github-ci.yaml @@ -21,8 +21,11 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", "3.10.3"] - + python-version: ["3.6", "3.7", "3.8", "3.9", "3.10"] + use-cryptodome: [""] + include: + - python-version: "3.10" + use-cryptodome: "false" steps: - name: Checkout Code uses: actions/checkout@v3 @@ -41,7 +44,7 @@ jobs: - name: Remove cryptodome run: | pip uninstall pycryptodome -y - if: matrix.python-version == '3.10.3' + if: matrix.use-cryptodome == false - name: Install PyPDF2 run: | pip install . From 1c217845412002c02d998cd907bb3e24e0e2cf6c Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Sun, 3 Jul 2022 20:38:14 +0200 Subject: [PATCH 09/13] Update tests/test_encryption.py Co-authored-by: Matthew Peveler --- tests/test_encryption.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/tests/test_encryption.py b/tests/test_encryption.py index 3ee3d53d6..cde29d6b6 100644 --- a/tests/test_encryption.py +++ b/tests/test_encryption.py @@ -53,13 +53,7 @@ def test_encryption(name, requres_pycryptodome): if requres_pycryptodome and not HAS_PYCRYPTODOME: with pytest.raises(DependencyError) as exc: ipdf = PyPDF2.PdfReader(inputfile) - if inputfile.endswith("unencrypted.pdf"): - assert not ipdf.is_encrypted - else: - assert ipdf.is_encrypted - ipdf.decrypt("asdfzxcv") - assert len(ipdf.pages) == 1 - dd = dict(ipdf.metadata) + ipdf.decrypt("asdfzxcv") assert exc.value.args[0] == "PyCryptodome is required for AES algorithm" return else: From 9d9e7da4e5a96b09d0e8b701d5608816df63f852 Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Sun, 3 Jul 2022 20:43:02 +0200 Subject: [PATCH 10/13] Access the metadata --- tests/test_encryption.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_encryption.py b/tests/test_encryption.py index cde29d6b6..b93dea4ea 100644 --- a/tests/test_encryption.py +++ b/tests/test_encryption.py @@ -54,6 +54,7 @@ def test_encryption(name, requres_pycryptodome): with pytest.raises(DependencyError) as exc: ipdf = PyPDF2.PdfReader(inputfile) ipdf.decrypt("asdfzxcv") + dd = dict(ipdf.metadata) assert exc.value.args[0] == "PyCryptodome is required for AES algorithm" return else: From 4baecaf33d96b3c5c2e0c2543023f802d3ce8e7d Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Sun, 3 Jul 2022 20:47:57 +0200 Subject: [PATCH 11/13] Use pytest.skip --- tests/test_encryption.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_encryption.py b/tests/test_encryption.py index b93dea4ea..6eee1d9ea 100644 --- a/tests/test_encryption.py +++ b/tests/test_encryption.py @@ -87,7 +87,7 @@ def test_encryption(name, requres_pycryptodome): ) def test_both_password(name, user_passwd, owner_passwd): if not HAS_PYCRYPTODOME: - return + pytest.skip() from PyPDF2 import PasswordType inputfile = os.path.join(RESOURCE_ROOT, "encryption", name) @@ -112,7 +112,7 @@ def test_get_page_of_encrypted_file_new_algorithm(pdffile, password): IndexError for get_page() of decrypted file """ if not HAS_PYCRYPTODOME: - return + pytest.skip() path = os.path.join(RESOURCE_ROOT, pdffile) PyPDF2.PdfReader(path, password=password).pages[0] @@ -132,7 +132,7 @@ def test_get_page_of_encrypted_file_new_algorithm(pdffile, password): ) def test_encryption_merge(names): if not HAS_PYCRYPTODOME: - return + pytest.skip() pdf_merger = PyPDF2.PdfMerger() files = [os.path.join(RESOURCE_ROOT, "encryption", x) for x in names] pdfs = [PyPDF2.PdfReader(x) for x in files] From 2dc61c17c8263caee268bc4f00db64114420a9cf Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Sun, 3 Jul 2022 21:00:10 +0200 Subject: [PATCH 12/13] use skipif decorator --- tests/test_encryption.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/test_encryption.py b/tests/test_encryption.py index 6eee1d9ea..21f08cd71 100644 --- a/tests/test_encryption.py +++ b/tests/test_encryption.py @@ -85,9 +85,8 @@ def test_encryption(name, requres_pycryptodome): ("r6-both-passwords.pdf", "foo", "bar"), ], ) +@pytest.mark.skipif(not HAS_PYCRYPTODOME) def test_both_password(name, user_passwd, owner_passwd): - if not HAS_PYCRYPTODOME: - pytest.skip() from PyPDF2 import PasswordType inputfile = os.path.join(RESOURCE_ROOT, "encryption", name) @@ -104,6 +103,7 @@ def test_both_password(name, user_passwd, owner_passwd): ("crazyones-encrypted-256.pdf", "password"), ], ) +@pytest.mark.skipif(not HAS_PYCRYPTODOME) def test_get_page_of_encrypted_file_new_algorithm(pdffile, password): """ Check if we can read a page of an encrypted file. @@ -111,8 +111,6 @@ def test_get_page_of_encrypted_file_new_algorithm(pdffile, password): This is a regression test for issue 327: IndexError for get_page() of decrypted file """ - if not HAS_PYCRYPTODOME: - pytest.skip() path = os.path.join(RESOURCE_ROOT, pdffile) PyPDF2.PdfReader(path, password=password).pages[0] @@ -130,9 +128,8 @@ def test_get_page_of_encrypted_file_new_algorithm(pdffile, password): ), ], ) +@pytest.mark.skipif(not HAS_PYCRYPTODOME) def test_encryption_merge(names): - if not HAS_PYCRYPTODOME: - pytest.skip() pdf_merger = PyPDF2.PdfMerger() files = [os.path.join(RESOURCE_ROOT, "encryption", x) for x in names] pdfs = [PyPDF2.PdfReader(x) for x in files] From 95dd8fa69926b6524034862ea9537d62c37384dd Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Sun, 3 Jul 2022 21:06:17 +0200 Subject: [PATCH 13/13] Add reason --- tests/test_encryption.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_encryption.py b/tests/test_encryption.py index 21f08cd71..43ebdf20e 100644 --- a/tests/test_encryption.py +++ b/tests/test_encryption.py @@ -85,7 +85,7 @@ def test_encryption(name, requres_pycryptodome): ("r6-both-passwords.pdf", "foo", "bar"), ], ) -@pytest.mark.skipif(not HAS_PYCRYPTODOME) +@pytest.mark.skipif(not HAS_PYCRYPTODOME, reason="No pycryptodome") def test_both_password(name, user_passwd, owner_passwd): from PyPDF2 import PasswordType @@ -103,7 +103,7 @@ def test_both_password(name, user_passwd, owner_passwd): ("crazyones-encrypted-256.pdf", "password"), ], ) -@pytest.mark.skipif(not HAS_PYCRYPTODOME) +@pytest.mark.skipif(not HAS_PYCRYPTODOME, reason="No pycryptodome") def test_get_page_of_encrypted_file_new_algorithm(pdffile, password): """ Check if we can read a page of an encrypted file. @@ -128,7 +128,7 @@ def test_get_page_of_encrypted_file_new_algorithm(pdffile, password): ), ], ) -@pytest.mark.skipif(not HAS_PYCRYPTODOME) +@pytest.mark.skipif(not HAS_PYCRYPTODOME, reason="No pycryptodome") def test_encryption_merge(names): pdf_merger = PyPDF2.PdfMerger() files = [os.path.join(RESOURCE_ROOT, "encryption", x) for x in names]