diff --git a/.github/workflows/benchmark.yaml b/.github/workflows/benchmark.yaml new file mode 100644 index 000000000..3564175e5 --- /dev/null +++ b/.github/workflows/benchmark.yaml @@ -0,0 +1,48 @@ +name: Benchmarking PyPDF2 +on: + push: + branches: + - main + +permissions: + contents: write + deployments: write + +jobs: + benchmark: + name: Run pytest-benchmark + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.10"] + steps: + - name: Checkout Code + uses: actions/checkout@v3 + - name: Setup Python + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install requirements (Python 3) + if: matrix.python-version != '2.7' + run: | + pip install -r requirements/ci.txt + - name: Install PyPDF2 + run: | + pip install . + - name: Run benchmark + run: | + pytest Tests/bench.py --benchmark-json output.json + - name: Store benchmark result + uses: benchmark-action/github-action-benchmark@v1 + with: + name: Python Benchmark with pytest-benchmark + tool: 'pytest' + output-file-path: output.json + # Use personal access token instead of GITHUB_TOKEN due to https://github.community/t/github-action-not-triggering-gh-pages-upon-push/16096 + github-token: ${{ secrets.GITHUB_TOKEN }} + auto-push: true + # Show alert with commit comment on detecting possible performance regression + alert-threshold: '200%' + comment-on-alert: true + fail-on-alert: true + alert-comment-cc-users: '@MartinThoma' \ No newline at end of file diff --git a/Tests/bench.py b/Tests/bench.py new file mode 100644 index 000000000..0c59be5ac --- /dev/null +++ b/Tests/bench.py @@ -0,0 +1,104 @@ +import os + +import PyPDF2 +from PyPDF2 import PdfFileReader +from PyPDF2.generic import Destination + +TESTS_ROOT = os.path.abspath(os.path.dirname(__file__)) +PROJECT_ROOT = os.path.dirname(TESTS_ROOT) +RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "Resources") + + +def page_ops(pdf_path, password): + pdf_path = os.path.join(RESOURCE_ROOT, pdf_path) + + reader = PdfFileReader(pdf_path) + + if password: + reader.decrypt(password) + + page = reader.pages[0] + page.mergeRotatedScaledPage(page, 90, 1, 1) + page.mergeScaledTranslatedPage(page, 1, 1, 1) + page.mergeRotatedScaledTranslatedPage(page, 90, 1, 1, 1, 1) + page.addTransformation([1, 0, 0, 0, 0, 0]) + page.scale(2, 2) + page.scaleBy(0.5) + page.scaleTo(100, 100) + page.compressContentStreams() + page.extractText() + + +def test_page_operations(benchmark): + """ + Apply various page operations. + + Rotation, scaling, translation, content stream compression, text extraction + """ + benchmark(page_ops, "libreoffice-writer-password.pdf", "openpassword") + + +def merge(): + pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf") + outline = os.path.join(RESOURCE_ROOT, "pdflatex-outline.pdf") + pdf_forms = os.path.join(RESOURCE_ROOT, "pdflatex-forms.pdf") + pdf_pw = os.path.join(RESOURCE_ROOT, "libreoffice-writer-password.pdf") + + file_merger = PyPDF2.PdfFileMerger() + + # string path: + file_merger.append(pdf_path) + file_merger.append(outline) + file_merger.append(pdf_path, pages=PyPDF2.pagerange.PageRange(slice(0, 0))) + file_merger.append(pdf_forms) + + # Merging an encrypted file + pdfr = PyPDF2.PdfFileReader(pdf_pw) + pdfr.decrypt("openpassword") + file_merger.append(pdfr) + + # PdfFileReader object: + file_merger.append(PyPDF2.PdfFileReader(pdf_path, "rb"), bookmark=True) + + # File handle + with open(pdf_path, "rb") as fh: + file_merger.append(fh) + + bookmark = file_merger.addBookmark("A bookmark", 0) + file_merger.addBookmark("deeper", 0, parent=bookmark) + file_merger.addMetadata({"author": "Martin Thoma"}) + file_merger.addNamedDestination("title", 0) + file_merger.setPageLayout("/SinglePage") + file_merger.setPageMode("/UseThumbs") + + tmp_path = "dont_commit_merged.pdf" + file_merger.write(tmp_path) + file_merger.close() + + # Check if bookmarks are correct + pdfr = PyPDF2.PdfFileReader(tmp_path) + assert [el.title for el in pdfr.getOutlines() if isinstance(el, Destination)] == [ + "Foo", + "Bar", + "Baz", + "Foo", + "Bar", + "Baz", + "Foo", + "Bar", + "Baz", + "True", + "A bookmark", + ] + + # Clean up + os.remove(tmp_path) + + +def test_merge(benchmark): + """ + Apply various page operations. + + Rotation, scaling, translation, content stream compression, text extraction + """ + benchmark(merge) diff --git a/docs/dev/intro.md b/docs/dev/intro.md new file mode 100644 index 000000000..95d0b4809 --- /dev/null +++ b/docs/dev/intro.md @@ -0,0 +1,16 @@ +# Developer Intro + +PyPDF2 is a library and hence its users are developers. This document is not for +the users, but for people who want to work on PyPDF2 itself. + +## Installing Requirements + +``` +pip install -r requirements/dev.txt +``` + +## Benchmarks + +We need to keep an eye on performance and thus we have a few benchmarks. + +See [py-pdf.github.io/PyPDF2/dev/bench](https://py-pdf.github.io/PyPDF2/dev/bench/) diff --git a/docs/index.rst b/docs/index.rst index f92aa5ef4..086bb2273 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -47,6 +47,11 @@ You can contribute to `PyPDF2 on Github `_. modules/Field modules/PageRange +.. toctree:: + :caption: PyPDF Developers + :maxdepth: 1 + + dev/intro .. toctree:: :caption: About PyPDF2 diff --git a/requirements/ci.in b/requirements/ci.in index a7592ecf5..0a6c2f57b 100644 --- a/requirements/ci.in +++ b/requirements/ci.in @@ -3,4 +3,5 @@ flake8 flake8_implicit_str_concat flake8-bugbear pillow -pytest \ No newline at end of file +pytest +pytest-benchmark \ No newline at end of file diff --git a/requirements/ci.txt b/requirements/ci.txt index fcca42ffe..e7e861572 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -38,6 +38,8 @@ pluggy==1.0.0 # via pytest py==1.11.0 # via pytest +py-cpuinfo==8.0.0 + # via pytest-benchmark pycodestyle==2.8.0 # via flake8 pyflakes==2.4.0 @@ -45,6 +47,10 @@ pyflakes==2.4.0 pyparsing==3.0.7 # via packaging pytest==7.0.1 + # via + # -r requirements/ci.in + # pytest-benchmark +pytest-benchmark==3.4.1 # via -r requirements/ci.in tomli==1.2.3 # via pytest