DEV: Use correct pytest markers (#1407)

It was tested like this: ## slow $ pytest --durations=50 -m "not slow" ## external Disable internet connect, delete the `tests/pdf_cache` directory and run $ pytest -m "not external" ## samples Remove the sample-files directory and run $ pytest -m "not samples"
py-pdf · Oct 29, 2022 · 4b8d849 · 4b8d849
1 parent b49034e
commit 4b8d849
Show file tree

Hide file tree

Showing 13 changed files with 152 additions and 24 deletions.
diff --git a/docs/dev/intro.md b/docs/dev/intro.md
@@ -11,15 +11,7 @@ pip install -r requirements/dev.txt
 
 ## Running Tests
 
-```
-pytest .
-```
-
-We have the following pytest markers defined:
-
-* `external`: Tests which use files from [the `sample-files` git submodule](https://github.com/py-pdf/sample-files)
-
-You can locally choose not to run those via `pytest -m "not external"`.
+See [testing PyPDF2 with pytest](testing.html)
 
 ## The sample-files git submodule
 The reason for having the submodule `sample-files` is that we want to keep

diff --git a/docs/dev/testing.md b/docs/dev/testing.md
@@ -2,6 +2,19 @@
 
 PyPDF2 uses [`pytest`](https://docs.pytest.org/en/7.1.x/) for testing.
 
+## De-selecting groups of tests
+
+PyPDF2 makes use of the following pytest markers:
+
+* `slow`: Tests that require more than 5 seconds
+* `samples`: Tests that require the [the `sample-files` git submodule](https://github.com/py-pdf/sample-files) to be initialized. As of October 2022, this is about 25 MB.
+* `external`: Tests that download PDF documents. They are stored locally and thus only need to be downloaded once. As of October 2022, this is about 200 MB.
+
+You can disable them by `pytest -m "not external"` or `pytest -m "not samples"`.
+You can even disable all of them: `pytest -m "not external" -m "not samples" -m "not slow"`.
+
+Please note that this reduces test coverage. The CI will always test all files.
+
 ## Creating a Coverage Report
 
 If you want to get a coverage report that considers the Python version specific

diff --git a/pyproject.toml b/pyproject.toml
@@ -57,7 +57,9 @@ exclude = [".github/*", "docs/*", "resources/*", "sample-files/*", "sample-files
 [tool.pytest.ini_options]
 filterwarnings = ["error"]
 markers = [
-    "external: Tests which use files from https://github.com/py-pdf/sample-files",
+    "slow: Test which require more than a second",
+    "samples: Tests which use files from https://github.com/py-pdf/sample-files",
+    "external: Tests which need to download files"
 ]
 testpaths = ["tests"]
 norecursedirs = ["tests/pdf_cache"]

diff --git a/tests/test_cmap.py b/tests/test_cmap.py
@@ -8,6 +8,8 @@
 from . import get_pdf_from_url
 
 
+@pytest.mark.external
+@pytest.mark.slow
 def test_compute_space_width():
     url = "https://corpora.tika.apache.org/base/docs/govdocs1/923/923406.pdf"
     name = "tika-923406.pdf"
@@ -17,6 +19,8 @@ def test_compute_space_width():
         page.extract_text()
 
 
+@pytest.mark.external
+@pytest.mark.slow
 def test_parse_to_unicode_process_rg():
     url = "https://corpora.tika.apache.org/base/docs/govdocs1/959/959173.pdf"
     name = "tika-959173.pdf"
@@ -30,6 +34,7 @@ def test_parse_to_unicode_process_rg():
         page.extract_text()
 
 
+@pytest.mark.external
 def test_parse_encoding_advanced_encoding_not_implemented():
     url = "https://corpora.tika.apache.org/base/docs/govdocs1/957/957144.pdf"
     name = "tika-957144.pdf"
@@ -40,6 +45,7 @@ def test_parse_encoding_advanced_encoding_not_implemented():
             page.extract_text()
 
 
+@pytest.mark.external
 def test_get_font_width_from_default():  # L40
     url = "https://corpora.tika.apache.org/base/docs/govdocs1/908/908104.pdf"
     name = "tika-908104.pdf"
@@ -48,6 +54,7 @@ def test_get_font_width_from_default():  # L40
         page.extract_text()
 
 
+@pytest.mark.external
 def test_multiline_bfrange():
     # non regression test for iss_1285
     url = "https://github.com/alexanderquispe/1REI05/raw/main/reports/report_1/The%20lean%20times%20in%20the%20Peruvian%20economy.pdf"
@@ -62,6 +69,7 @@ def test_multiline_bfrange():
         page.extract_text()
 
 
+@pytest.mark.external
 def test_bfchar_on_2_chars():
     # iss #1293
     url = "https://github.com/xyegithub/myBlog/raw/main/posts/c94b2364/paper_pdfs/ImageClassification/2007%2CASurveyofImageClassificationBasedTechniques.pdf"
@@ -71,6 +79,7 @@ def test_bfchar_on_2_chars():
         page.extract_text()
 
 
+@pytest.mark.external
 def test_ascii_charset():
     # iss #1312
     url = "https://github.com/py-pdf/PyPDF2/files/9472500/main.pdf"
@@ -79,13 +88,15 @@ def test_ascii_charset():
     assert "/a" not in reader.pages[0].extract_text()
 
 
+@pytest.mark.external
 def test_iss1370():
     url = "https://github.com/py-pdf/PyPDF2/files/9667138/cmap1370.pdf"
     name = "cmap1370.pdf"
     reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
     reader.pages[0].extract_text()
 
 
+@pytest.mark.external
 def test_iss1379():
     url = "https://github.com/py-pdf/PyPDF2/files/9712729/02voc.pdf"
     name = "02voc.pdf"

diff --git a/tests/test_filters.py b/tests/test_filters.py
@@ -200,6 +200,7 @@ def test_CCITTFaxDecode():
     )
 
 
+@pytest.mark.external
 @patch("PyPDF2._reader.logger_warning")
 def test_decompress_zlib_error(mock_logger_warning):
     url = "https://corpora.tika.apache.org/base/docs/govdocs1/952/952445.pdf"
@@ -212,6 +213,7 @@ def test_decompress_zlib_error(mock_logger_warning):
     )
 
 
+@pytest.mark.external
 def test_lzw_decode_neg1():
     url = "https://corpora.tika.apache.org/base/docs/govdocs1/921/921632.pdf"
     name = "tika-921632.pdf"
@@ -222,13 +224,15 @@ def test_lzw_decode_neg1():
     assert exc.value.args[0] == "Missed the stop code in LZWDecode!"
 
 
+@pytest.mark.external
 def test_issue_399():
     url = "https://corpora.tika.apache.org/base/docs/govdocs1/976/976970.pdf"
     name = "tika-976970.pdf"
     reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
     reader.pages[1].extract_text()
 
 
+@pytest.mark.external
 def test_image_without_imagemagic():
     with patch.dict(sys.modules):
         sys.modules["PIL"] = None

diff --git a/tests/test_generic.py b/tests/test_generic.py
@@ -595,6 +595,7 @@ def test_remove_child_in_tree():
     tree.empty_tree()
 
 
+@pytest.mark.external
 def test_dict_read_from_stream(caplog):
     url = "https://corpora.tika.apache.org/base/docs/govdocs1/984/984877.pdf"
     name = "tika-984877.pdf"
@@ -608,6 +609,7 @@ def test_dict_read_from_stream(caplog):
     )
 
 
+@pytest.mark.external
 def test_parse_content_stream_peek_percentage():
     url = "https://corpora.tika.apache.org/base/docs/govdocs1/985/985770.pdf"
     name = "tika-985770.pdf"
@@ -617,6 +619,7 @@ def test_parse_content_stream_peek_percentage():
         page.extract_text()
 
 
+@pytest.mark.external
 def test_read_inline_image_no_has_q():
     # pdf/df7e1add3156af17a372bc165e47a244.pdf
     url = "https://corpora.tika.apache.org/base/docs/govdocs1/998/998719.pdf"
@@ -627,6 +630,7 @@ def test_read_inline_image_no_has_q():
         page.extract_text()
 
 
+@pytest.mark.external
 def test_read_inline_image_loc_neg_1():
     url = "https://corpora.tika.apache.org/base/docs/govdocs1/935/935066.pdf"
     name = "tika-935066.pdf"
@@ -636,6 +640,8 @@ def test_read_inline_image_loc_neg_1():
         page.extract_text()
 
 
+@pytest.mark.slow
+@pytest.mark.external
 def test_text_string_write_to_stream():
     url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924562.pdf"
     name = "tika-924562.pdf"
@@ -645,6 +651,7 @@ def test_text_string_write_to_stream():
         page.compress_content_streams()
 
 
+@pytest.mark.external
 def test_name_object_read_from_stream_unicode_error():  # L588
     url = "https://corpora.tika.apache.org/base/docs/govdocs1/974/974966.pdf"
     name = "tika-974966.pdf"
@@ -654,6 +661,7 @@ def test_name_object_read_from_stream_unicode_error():  # L588
         page.extract_text()
 
 
+@pytest.mark.external
 def test_bool_repr(tmp_path):
     url = "https://corpora.tika.apache.org/base/docs/govdocs1/932/932449.pdf"
     name = "tika-932449.pdf"
@@ -673,6 +681,7 @@ def test_bool_repr(tmp_path):
     )
 
 
+@pytest.mark.external
 @patch("PyPDF2._reader.logger_warning")
 def test_issue_997(mock_logger_warning):
     url = "https://github.com/py-pdf/PyPDF2/files/8908874/Exhibit_A-2_930_Enterprise_Zone_Tax_Credits_final.pdf"

diff --git a/tests/test_merger.py b/tests/test_merger.py
@@ -214,6 +214,7 @@ def test_merge_write_closed_fh():
     assert exc.value.args[0] == err_closed
 
 
+@pytest.mark.external
 def test_trim_outline_list():
     url = "https://corpora.tika.apache.org/base/docs/govdocs1/995/995175.pdf"
     name = "tika-995175.pdf"
@@ -227,6 +228,7 @@ def test_trim_outline_list():
     os.remove("tmp-merger-do-not-commit.pdf")
 
 
+@pytest.mark.external
 def test_zoom():
     url = "https://corpora.tika.apache.org/base/docs/govdocs1/994/994759.pdf"
     name = "tika-994759.pdf"
@@ -240,6 +242,7 @@ def test_zoom():
     os.remove("tmp-merger-do-not-commit.pdf")
 
 
+@pytest.mark.external
 def test_zoom_xyz_no_left():
     url = "https://corpora.tika.apache.org/base/docs/govdocs1/933/933322.pdf"
     name = "tika-933322.pdf"
@@ -253,6 +256,7 @@ def test_zoom_xyz_no_left():
     os.remove("tmp-merger-do-not-commit.pdf")
 
 
+@pytest.mark.external
 def test_outline_item():
     url = "https://corpora.tika.apache.org/base/docs/govdocs1/997/997511.pdf"
     name = "tika-997511.pdf"
@@ -266,6 +270,8 @@ def test_outline_item():
     os.remove("tmp-merger-do-not-commit.pdf")
 
 
+@pytest.mark.external
+@pytest.mark.slow
 def test_trim_outline():
     url = "https://corpora.tika.apache.org/base/docs/govdocs1/982/982336.pdf"
     name = "tika-982336.pdf"
@@ -279,6 +285,8 @@ def test_trim_outline():
     os.remove("tmp-merger-do-not-commit.pdf")
 
 
+@pytest.mark.external
+@pytest.mark.slow
 def test1():
     url = "https://corpora.tika.apache.org/base/docs/govdocs1/923/923621.pdf"
     name = "tika-923621.pdf"
@@ -292,6 +300,8 @@ def test1():
     os.remove("tmp-merger-do-not-commit.pdf")
 
 
+@pytest.mark.external
+@pytest.mark.slow
 def test_sweep_recursion1():
     # TODO: This test looks like an infinite loop.
     url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924546.pdf"
@@ -309,6 +319,8 @@ def test_sweep_recursion1():
     os.remove("tmp-merger-do-not-commit.pdf")
 
 
+@pytest.mark.external
+@pytest.mark.slow
 @pytest.mark.parametrize(
     ("url", "name"),
     [
@@ -337,6 +349,7 @@ def test_sweep_recursion2(url, name):
     os.remove("tmp-merger-do-not-commit.pdf")
 
 
+@pytest.mark.external
 def test_sweep_indirect_list_newobj_is_None(caplog):
     url = "https://corpora.tika.apache.org/base/docs/govdocs1/906/906769.pdf"
     name = "tika-906769.pdf"
@@ -354,6 +367,7 @@ def test_sweep_indirect_list_newobj_is_None(caplog):
     os.remove("tmp-merger-do-not-commit.pdf")
 
 
+@pytest.mark.external
 def test_iss1145():
     # issue with FitH destination with null param
     url = "https://github.com/py-pdf/PyPDF2/files/9164743/file-0.pdf"
@@ -382,6 +396,7 @@ def test_deprecate_bookmark_decorator_output():
     assert merger.outline[0].title == first_oi_title
 
 
+@pytest.mark.external
 def test_iss1344(caplog):
     url = "https://github.com/py-pdf/PyPDF2/files/9549001/input.pdf"
     name = "iss1344.pdf"