PKG: Declare Pillow as optional dependency (#1392)

Closes #1390
py-pdf · Oct 13, 2022 · 664ddf2 · 664ddf2
1 parent cc55321
commit 664ddf2
Show file tree

Hide file tree

Showing 8 changed files with 55 additions and 5 deletions.
diff --git a/PyPDF2/_page.py b/PyPDF2/_page.py
@@ -373,6 +373,8 @@ def images(self) -> List[File]:
         """
         Get a list of all images of the page.
 
+        This requires pillow. You can install it via 'pip install PyPDF2[image]'.
+
         For the moment, this does NOT include inline images. They will be added
         in future.
         """

diff --git a/PyPDF2/filters.py b/PyPDF2/filters.py
@@ -564,7 +564,13 @@ def _xobj_to_image(x_object_obj: Dict[str, Any]) -> Tuple[Optional[str], bytes]:
 
     :return: Tuple[file extension, bytes]
     """
-    from PIL import Image
+    try:
+        from PIL import Image
+    except ImportError:
+        raise ImportError(
+            "pillow is required to do image extraction. "
+            "It can be installed via 'pip install PyPDF2[image]'"
+        )
 
     size = (x_object_obj[IA.WIDTH], x_object_obj[IA.HEIGHT])
     data = x_object_obj.get_data()  # type: ignore

diff --git a/docs/user/encryption-decryption.md b/docs/user/encryption-decryption.md
@@ -1,8 +1,7 @@
 # Encryption and Decryption of PDFs
 
-Please see the note in the
-[Installation doc](https://pypdf2.readthedocs.io/en/latest/user/installation.html)
-for installing the extra dependencies if interacting with PDFs that use AES.
+> Please see the note in the [installation guide](installation.html)
+> for installing the extra dependencies if interacting with PDFs that use AES.
 
 ## Encrypt
 

diff --git a/docs/user/extract-images.md b/docs/user/extract-images.md
@@ -1,5 +1,8 @@
 # Extract Images
 
+> Please note: In order to use the following code you need to install optional
+> dependencies, see [installation guide](installation.html).
+
 Every page of a PDF document can contain an arbitrary amount of images.
 The names of the files may not be unique.
 

diff --git a/docs/user/installation.md b/docs/user/installation.md
@@ -20,6 +20,20 @@ install PyPDF2 for your current user:
 pip install --user PyPDF2
 ```
 
+### Optional dependencies
+
+PyPDF2 tries to be as self-contained as possible, but for some tasks the amout
+of work to properly maintain the code would be too high. This is especially the
+case for cryptography and image formats.
+
+If you simply want to unstall all optional dependencies, please run
+
+```
+pip install PyPDF2[full]
+```
+
+Alternatively, you can install just some:
+
 If you plan to use PyPDF2 for encrypting or decrypting PDFs that use AES, you
 will need to install some extra dependencies. Encryption using RC4 is supported
 using the regular installation.
@@ -28,6 +42,11 @@ using the regular installation.
 pip install PyPDF2[crypto]
 ```
 
+If you plan to use image extraction, you need Pillow:
+
+```
+pip install PyPDF2[image]
+```
 
 ## Python Version Support
 

diff --git a/setup.cfg b/setup.cfg
@@ -42,7 +42,9 @@ install_requires =
     dataclasses; python_version < '3.7'
 
 [options.extras_require]
+full = PyCryptodome; Pillow
 crypto = PyCryptodome
+image = Pillow
 
 [mutmut]
 backup = False

diff --git a/tests/test_filters.py b/tests/test_filters.py
@@ -1,4 +1,5 @@
 import string
+import sys
 from io import BytesIO
 from itertools import product as cartesian_product
 from unittest.mock import patch
@@ -226,3 +227,20 @@ def test_issue_399():
     name = "tika-976970.pdf"
     reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
     reader.pages[1].extract_text()
+
+
+def test_image_without_imagemagic():
+    with patch.dict(sys.modules):
+        sys.modules["PIL"] = None
+        url = "https://corpora.tika.apache.org/base/docs/govdocs1/914/914102.pdf"
+        name = "tika-914102.pdf"
+        data = BytesIO(get_pdf_from_url(url, name=name))
+        reader = PdfReader(data, strict=True)
+
+        for page in reader.pages:
+            with pytest.raises(ImportError) as exc:
+                page.images
+            assert (
+                exc.value.args[0]
+                == "pillow is required to do image extraction. It can be installed via 'pip install PyPDF2[image]'"
+            )
diff --git a/tests/test_reader.py b/tests/test_reader.py
@@ -5,7 +5,6 @@
 from pathlib import Path
 
 import pytest
-from PIL import Image
 
 from PyPDF2 import PdfReader
 from PyPDF2._reader import convert_to_int, convertToInt
@@ -188,6 +187,8 @@ def test_get_outline(src, outline_elements):
     ],
 )
 def test_get_images(src, expected_images):
+    from PIL import Image
+
     src_abs = RESOURCE_ROOT / src
     reader = PdfReader(src_abs)