Skip to content

Commit

Permalink
DEV: Use correct pytest markers (#1407)
Browse files Browse the repository at this point in the history
It was tested like this:

## slow

$ pytest --durations=50 -m "not slow"

## external

Disable internet connect, delete the `tests/pdf_cache` directory and run

$ pytest -m "not external"

## samples

Remove the sample-files directory and run

$ pytest -m "not samples"
  • Loading branch information
MartinThoma authored Oct 29, 2022
1 parent b49034e commit 4b8d849
Show file tree
Hide file tree
Showing 13 changed files with 152 additions and 24 deletions.
10 changes: 1 addition & 9 deletions docs/dev/intro.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,7 @@ pip install -r requirements/dev.txt

## Running Tests

```
pytest .
```

We have the following pytest markers defined:

* `external`: Tests which use files from [the `sample-files` git submodule](https://github.com/py-pdf/sample-files)

You can locally choose not to run those via `pytest -m "not external"`.
See [testing PyPDF2 with pytest](testing.html)

## The sample-files git submodule
The reason for having the submodule `sample-files` is that we want to keep
Expand Down
13 changes: 13 additions & 0 deletions docs/dev/testing.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,19 @@

PyPDF2 uses [`pytest`](https://docs.pytest.org/en/7.1.x/) for testing.

## De-selecting groups of tests

PyPDF2 makes use of the following pytest markers:

* `slow`: Tests that require more than 5 seconds
* `samples`: Tests that require the [the `sample-files` git submodule](https://github.com/py-pdf/sample-files) to be initialized. As of October 2022, this is about 25 MB.
* `external`: Tests that download PDF documents. They are stored locally and thus only need to be downloaded once. As of October 2022, this is about 200 MB.

You can disable them by `pytest -m "not external"` or `pytest -m "not samples"`.
You can even disable all of them: `pytest -m "not external" -m "not samples" -m "not slow"`.

Please note that this reduces test coverage. The CI will always test all files.

## Creating a Coverage Report

If you want to get a coverage report that considers the Python version specific
Expand Down
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,9 @@ exclude = [".github/*", "docs/*", "resources/*", "sample-files/*", "sample-files
[tool.pytest.ini_options]
filterwarnings = ["error"]
markers = [
"external: Tests which use files from https://github.com/py-pdf/sample-files",
"slow: Test which require more than a second",
"samples: Tests which use files from https://github.com/py-pdf/sample-files",
"external: Tests which need to download files"
]
testpaths = ["tests"]
norecursedirs = ["tests/pdf_cache"]
Expand Down
11 changes: 11 additions & 0 deletions tests/test_cmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from . import get_pdf_from_url


@pytest.mark.external
@pytest.mark.slow
def test_compute_space_width():
url = "https://corpora.tika.apache.org/base/docs/govdocs1/923/923406.pdf"
name = "tika-923406.pdf"
Expand All @@ -17,6 +19,8 @@ def test_compute_space_width():
page.extract_text()


@pytest.mark.external
@pytest.mark.slow
def test_parse_to_unicode_process_rg():
url = "https://corpora.tika.apache.org/base/docs/govdocs1/959/959173.pdf"
name = "tika-959173.pdf"
Expand All @@ -30,6 +34,7 @@ def test_parse_to_unicode_process_rg():
page.extract_text()


@pytest.mark.external
def test_parse_encoding_advanced_encoding_not_implemented():
url = "https://corpora.tika.apache.org/base/docs/govdocs1/957/957144.pdf"
name = "tika-957144.pdf"
Expand All @@ -40,6 +45,7 @@ def test_parse_encoding_advanced_encoding_not_implemented():
page.extract_text()


@pytest.mark.external
def test_get_font_width_from_default(): # L40
url = "https://corpora.tika.apache.org/base/docs/govdocs1/908/908104.pdf"
name = "tika-908104.pdf"
Expand All @@ -48,6 +54,7 @@ def test_get_font_width_from_default(): # L40
page.extract_text()


@pytest.mark.external
def test_multiline_bfrange():
# non regression test for iss_1285
url = "https://github.com/alexanderquispe/1REI05/raw/main/reports/report_1/The%20lean%20times%20in%20the%20Peruvian%20economy.pdf"
Expand All @@ -62,6 +69,7 @@ def test_multiline_bfrange():
page.extract_text()


@pytest.mark.external
def test_bfchar_on_2_chars():
# iss #1293
url = "https://github.com/xyegithub/myBlog/raw/main/posts/c94b2364/paper_pdfs/ImageClassification/2007%2CASurveyofImageClassificationBasedTechniques.pdf"
Expand All @@ -71,6 +79,7 @@ def test_bfchar_on_2_chars():
page.extract_text()


@pytest.mark.external
def test_ascii_charset():
# iss #1312
url = "https://github.com/py-pdf/PyPDF2/files/9472500/main.pdf"
Expand All @@ -79,13 +88,15 @@ def test_ascii_charset():
assert "/a" not in reader.pages[0].extract_text()


@pytest.mark.external
def test_iss1370():
url = "https://github.com/py-pdf/PyPDF2/files/9667138/cmap1370.pdf"
name = "cmap1370.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
reader.pages[0].extract_text()


@pytest.mark.external
def test_iss1379():
url = "https://github.com/py-pdf/PyPDF2/files/9712729/02voc.pdf"
name = "02voc.pdf"
Expand Down
4 changes: 4 additions & 0 deletions tests/test_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ def test_CCITTFaxDecode():
)


@pytest.mark.external
@patch("PyPDF2._reader.logger_warning")
def test_decompress_zlib_error(mock_logger_warning):
url = "https://corpora.tika.apache.org/base/docs/govdocs1/952/952445.pdf"
Expand All @@ -212,6 +213,7 @@ def test_decompress_zlib_error(mock_logger_warning):
)


@pytest.mark.external
def test_lzw_decode_neg1():
url = "https://corpora.tika.apache.org/base/docs/govdocs1/921/921632.pdf"
name = "tika-921632.pdf"
Expand All @@ -222,13 +224,15 @@ def test_lzw_decode_neg1():
assert exc.value.args[0] == "Missed the stop code in LZWDecode!"


@pytest.mark.external
def test_issue_399():
url = "https://corpora.tika.apache.org/base/docs/govdocs1/976/976970.pdf"
name = "tika-976970.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
reader.pages[1].extract_text()


@pytest.mark.external
def test_image_without_imagemagic():
with patch.dict(sys.modules):
sys.modules["PIL"] = None
Expand Down
9 changes: 9 additions & 0 deletions tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,7 @@ def test_remove_child_in_tree():
tree.empty_tree()


@pytest.mark.external
def test_dict_read_from_stream(caplog):
url = "https://corpora.tika.apache.org/base/docs/govdocs1/984/984877.pdf"
name = "tika-984877.pdf"
Expand All @@ -608,6 +609,7 @@ def test_dict_read_from_stream(caplog):
)


@pytest.mark.external
def test_parse_content_stream_peek_percentage():
url = "https://corpora.tika.apache.org/base/docs/govdocs1/985/985770.pdf"
name = "tika-985770.pdf"
Expand All @@ -617,6 +619,7 @@ def test_parse_content_stream_peek_percentage():
page.extract_text()


@pytest.mark.external
def test_read_inline_image_no_has_q():
# pdf/df7e1add3156af17a372bc165e47a244.pdf
url = "https://corpora.tika.apache.org/base/docs/govdocs1/998/998719.pdf"
Expand All @@ -627,6 +630,7 @@ def test_read_inline_image_no_has_q():
page.extract_text()


@pytest.mark.external
def test_read_inline_image_loc_neg_1():
url = "https://corpora.tika.apache.org/base/docs/govdocs1/935/935066.pdf"
name = "tika-935066.pdf"
Expand All @@ -636,6 +640,8 @@ def test_read_inline_image_loc_neg_1():
page.extract_text()


@pytest.mark.slow
@pytest.mark.external
def test_text_string_write_to_stream():
url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924562.pdf"
name = "tika-924562.pdf"
Expand All @@ -645,6 +651,7 @@ def test_text_string_write_to_stream():
page.compress_content_streams()


@pytest.mark.external
def test_name_object_read_from_stream_unicode_error(): # L588
url = "https://corpora.tika.apache.org/base/docs/govdocs1/974/974966.pdf"
name = "tika-974966.pdf"
Expand All @@ -654,6 +661,7 @@ def test_name_object_read_from_stream_unicode_error(): # L588
page.extract_text()


@pytest.mark.external
def test_bool_repr(tmp_path):
url = "https://corpora.tika.apache.org/base/docs/govdocs1/932/932449.pdf"
name = "tika-932449.pdf"
Expand All @@ -673,6 +681,7 @@ def test_bool_repr(tmp_path):
)


@pytest.mark.external
@patch("PyPDF2._reader.logger_warning")
def test_issue_997(mock_logger_warning):
url = "https://github.com/py-pdf/PyPDF2/files/8908874/Exhibit_A-2_930_Enterprise_Zone_Tax_Credits_final.pdf"
Expand Down
15 changes: 15 additions & 0 deletions tests/test_merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ def test_merge_write_closed_fh():
assert exc.value.args[0] == err_closed


@pytest.mark.external
def test_trim_outline_list():
url = "https://corpora.tika.apache.org/base/docs/govdocs1/995/995175.pdf"
name = "tika-995175.pdf"
Expand All @@ -227,6 +228,7 @@ def test_trim_outline_list():
os.remove("tmp-merger-do-not-commit.pdf")


@pytest.mark.external
def test_zoom():
url = "https://corpora.tika.apache.org/base/docs/govdocs1/994/994759.pdf"
name = "tika-994759.pdf"
Expand All @@ -240,6 +242,7 @@ def test_zoom():
os.remove("tmp-merger-do-not-commit.pdf")


@pytest.mark.external
def test_zoom_xyz_no_left():
url = "https://corpora.tika.apache.org/base/docs/govdocs1/933/933322.pdf"
name = "tika-933322.pdf"
Expand All @@ -253,6 +256,7 @@ def test_zoom_xyz_no_left():
os.remove("tmp-merger-do-not-commit.pdf")


@pytest.mark.external
def test_outline_item():
url = "https://corpora.tika.apache.org/base/docs/govdocs1/997/997511.pdf"
name = "tika-997511.pdf"
Expand All @@ -266,6 +270,8 @@ def test_outline_item():
os.remove("tmp-merger-do-not-commit.pdf")


@pytest.mark.external
@pytest.mark.slow
def test_trim_outline():
url = "https://corpora.tika.apache.org/base/docs/govdocs1/982/982336.pdf"
name = "tika-982336.pdf"
Expand All @@ -279,6 +285,8 @@ def test_trim_outline():
os.remove("tmp-merger-do-not-commit.pdf")


@pytest.mark.external
@pytest.mark.slow
def test1():
url = "https://corpora.tika.apache.org/base/docs/govdocs1/923/923621.pdf"
name = "tika-923621.pdf"
Expand All @@ -292,6 +300,8 @@ def test1():
os.remove("tmp-merger-do-not-commit.pdf")


@pytest.mark.external
@pytest.mark.slow
def test_sweep_recursion1():
# TODO: This test looks like an infinite loop.
url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924546.pdf"
Expand All @@ -309,6 +319,8 @@ def test_sweep_recursion1():
os.remove("tmp-merger-do-not-commit.pdf")


@pytest.mark.external
@pytest.mark.slow
@pytest.mark.parametrize(
("url", "name"),
[
Expand Down Expand Up @@ -337,6 +349,7 @@ def test_sweep_recursion2(url, name):
os.remove("tmp-merger-do-not-commit.pdf")


@pytest.mark.external
def test_sweep_indirect_list_newobj_is_None(caplog):
url = "https://corpora.tika.apache.org/base/docs/govdocs1/906/906769.pdf"
name = "tika-906769.pdf"
Expand All @@ -354,6 +367,7 @@ def test_sweep_indirect_list_newobj_is_None(caplog):
os.remove("tmp-merger-do-not-commit.pdf")


@pytest.mark.external
def test_iss1145():
# issue with FitH destination with null param
url = "https://github.com/py-pdf/PyPDF2/files/9164743/file-0.pdf"
Expand Down Expand Up @@ -382,6 +396,7 @@ def test_deprecate_bookmark_decorator_output():
assert merger.outline[0].title == first_oi_title


@pytest.mark.external
def test_iss1344(caplog):
url = "https://github.com/py-pdf/PyPDF2/files/9549001/input.pdf"
name = "iss1344.pdf"
Expand Down
Loading

0 comments on commit 4b8d849

Please sign in to comment.