From abe19f761450756e6bc837a626f8b3afd71236e1 Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Tue, 1 Nov 2022 13:38:46 +0100 Subject: [PATCH] DEV: Modify read_string_from_stream to a benchmark (#1415) The test before was to brittle. We need to keep an open eye to the benchmarks in future, but also be careful with interpreting the numbers. Credits to mergezalot in PR #1413 --- tests/bench.py | 17 ++++++++++++++++- tests/test_generic.py | 14 -------------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/tests/bench.py b/tests/bench.py index 4ae9bb2d1..1caa18319 100644 --- a/tests/bench.py +++ b/tests/bench.py @@ -1,8 +1,9 @@ +from io import BytesIO from pathlib import Path import PyPDF2 from PyPDF2 import PdfReader, Transformation -from PyPDF2.generic import Destination +from PyPDF2.generic import Destination, read_string_from_stream TESTS_ROOT = Path(__file__).parent.resolve() PROJECT_ROOT = TESTS_ROOT.parent @@ -125,3 +126,17 @@ def text_extraction(pdf_path): def test_text_extraction(benchmark): file_path = SAMPLE_ROOT / "009-pdflatex-geotopo/GeoTopo.pdf" benchmark(text_extraction, file_path) + + +def read_string_from_stream_performance(): + stream = BytesIO(b"(" + b"".join([b"x"] * 1024 * 256) + b")") + assert read_string_from_stream(stream) + + +def test_read_string_from_stream_performance(benchmark): + """ + This test simulates reading an embedded base64 image of 256kb. + It should be faster than a second, even on ancient machines. + Runs < 100ms on a 2019 notebook. Takes 10 seconds prior to #1350. + """ + benchmark(read_string_from_stream_performance) diff --git a/tests/test_generic.py b/tests/test_generic.py index 0e1d11233..4b0465253 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -1,5 +1,4 @@ import os -import time from io import BytesIO from pathlib import Path from unittest.mock import patch @@ -171,19 +170,6 @@ def test_readStringFromStream_excape_digit2(): assert read_string_from_stream(stream) == "hello \x01\x02\x03\x04" -def test_readStringFromStream_performance(): - """ - This test simulates reading an embedded base64 image of 256kb. - It should be faster than a second, even on ancient machines. - Runs < 100ms on a 2019 notebook. Takes 10 seconds prior to #1350. - """ - stream = BytesIO(b"(" + b"".join([b"x"] * 1024 * 256) + b")") - start = time.process_time() - assert read_string_from_stream(stream) - end = time.process_time() - assert end - start < 4, test_readStringFromStream_performance.__doc__ - - def test_NameObject(caplog): stream = BytesIO(b"x") with pytest.raises(PdfReadError) as exc: