From 0084bcbb35b43c826e98f1d3f472848480cbd3b4 Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Sun, 17 Apr 2022 13:57:23 +0200 Subject: [PATCH 1/4] TST: Add tests for Writer --- .gitignore | 3 +- PyPDF2/pdf.py | 12 +++++++- Tests/test_pagerange.py | 8 ++++++ Tests/test_writer.py | 61 +++++++++++++++++++++++++++++++++++++++-- docs/index.rst | 1 + docs/user/forms.md | 24 ++++++++++++++++ 6 files changed, 104 insertions(+), 5 deletions(-) create mode 100644 docs/user/forms.md diff --git a/.gitignore b/.gitignore index e11314331..860ab3e92 100644 --- a/.gitignore +++ b/.gitignore @@ -23,8 +23,7 @@ coverage.xml docs/_build/ # Files generated by some of the scripts -dont_commit_merged.pdf -dont_commit_writer.pdf +dont_commit_*.pdf PyPDF2-output.pdf Image9.png PyPDF2_pdfLocation.txt diff --git a/PyPDF2/pdf.py b/PyPDF2/pdf.py index 0fd5cdbc1..ec9b47235 100644 --- a/PyPDF2/pdf.py +++ b/PyPDF2/pdf.py @@ -712,7 +712,17 @@ def addBookmarkDict(self, bookmark, parent=None): return bookmarkRef - def addBookmark(self, title, pagenum, parent=None, color=None, bold=False, italic=False, fit='/Fit', *args): + def addBookmark( + self, + title, + pagenum, + parent=None, + color=None, + bold=False, + italic=False, + fit='/Fit', + *args + ): """ Add a bookmark to this PDF file. diff --git a/Tests/test_pagerange.py b/Tests/test_pagerange.py index b6c89dccb..b8d8fa825 100644 --- a/Tests/test_pagerange.py +++ b/Tests/test_pagerange.py @@ -16,6 +16,14 @@ def test_str(page_range, expected): assert str(PageRange(page_range)) == expected +@pytest.mark.parametrize( + "page_range,expected", + [(slice(0, 5), "PageRange('0:5')"), (slice(0, 5, 2), "PageRange('0:5:2')")], +) +def test_repr(page_range, expected): + assert repr(PageRange(page_range)) == expected + + def test_equality_other_objectc(): pr1 = PageRange(slice(0, 5)) pr2 = "PageRange(slice(0, 5))" diff --git a/Tests/test_writer.py b/Tests/test_writer.py index 357dc1a34..c4913fc2b 100644 --- a/Tests/test_writer.py +++ b/Tests/test_writer.py @@ -33,7 +33,6 @@ def test_writer_operations(): writer.removeText() writer.insertPage(reader_outline.pages[0], 0) writer.addBookmarkDestination(page) - writer.addBookmark("A bookmark", 0) # output.addNamedDestination("A named destination", 1) writer.removeLinks() # assert output.getNamedDestRoot() == ['A named destination', IndirectObject(9, 0, output)] @@ -122,7 +121,6 @@ def test_fill_form(): page = reader.pages[0] fields = reader.getFields() - print(fields) writer.addPage(page) @@ -132,3 +130,62 @@ def test_fill_form(): tmp_filename = "dont_commit_filled_pdf.pdf" with open(tmp_filename, "wb") as output_stream: writer.write(output_stream) + + +def test_encrypt(): + reader = PdfFileReader(os.path.join(RESOURCE_ROOT, "form.pdf")) + writer = PdfFileWriter() + + page = reader.pages[0] + + writer.addPage(page) + writer.encrypt(user_pwd="userpwd", owner_pwd="ownerpwd", use_128bit=False) + + # write "output" to PyPDF2-output.pdf + tmp_filename = "dont_commit_encrypted.pdf" + with open(tmp_filename, "wb") as output_stream: + writer.write(output_stream) + + # Cleanup + os.remove(tmp_filename) + + +def test_add_bookmark(): + reader = PdfFileReader(os.path.join(RESOURCE_ROOT, "pdflatex-outline.pdf")) + writer = PdfFileWriter() + + for page in reader.pages: + writer.addPage(page) + + bookmark = writer.addBookmark( + "A bookmark", 1, None, (255, 0, 15), True, True, "/Fit", 200, 0, None + ) + writer.addBookmark("Another", 2, bookmark, None, False, False, "/Fit", 0, 0, None) + + # write "output" to PyPDF2-output.pdf + tmp_filename = "dont_commit_bookmark.pdf" + with open(tmp_filename, "wb") as output_stream: + writer.write(output_stream) + + # Cleanup + os.remove(tmp_filename) + + +def test_add_named_destination(): + reader = PdfFileReader(os.path.join(RESOURCE_ROOT, "pdflatex-outline.pdf")) + writer = PdfFileWriter() + + for page in reader.pages: + writer.addPage(page) + + from PyPDF2.pdf import NameObject + + writer.addNamedDestination(NameObject("A bookmark"), 2) + + # write "output" to PyPDF2-output.pdf + tmp_filename = "dont_commit_named_destination.pdf" + with open(tmp_filename, "wb") as output_stream: + writer.write(output_stream) + + # Cleanup + os.remove(tmp_filename) diff --git a/docs/index.rst b/docs/index.rst index e5cdcd238..b73115df5 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -27,6 +27,7 @@ You can contribute to `PyPDF2 on Github `_. user/add-watermark user/reading-pdf-annotations user/adding-pdf-annotations + user/forms .. toctree:: diff --git a/docs/user/forms.md b/docs/user/forms.md new file mode 100644 index 000000000..e6d5c012c --- /dev/null +++ b/docs/user/forms.md @@ -0,0 +1,24 @@ +# Interactions with PDF Forms + +## Filling out forms + +```python +from PyPDF2 import PdfFileReader, PdfFileWriter + +reader = PdfFileReader("form.pdf") +writer = PdfFileWriter() + +page = reader.pages[0] +fields = reader.getFields() + +writer.addPage(page) + +writer.updatePageFormFieldValues( + writer.getPage(0), + {"fieldname": "some filled in text"} +) + +# write "output" to PyPDF2-output.pdf +with open("filled-out.pdf", "wb") as output_stream: + writer.write(output_stream) +``` From 0307e3a96b45871726814c38250746c80abdba8e Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Sun, 17 Apr 2022 14:38:45 +0200 Subject: [PATCH 2/4] More tests --- Tests/test_reader.py | 67 ++++++++++++++++++--- Tests/test_writer.py | 136 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 188 insertions(+), 15 deletions(-) diff --git a/Tests/test_reader.py b/Tests/test_reader.py index 289c3098a..26ff087bd 100644 --- a/Tests/test_reader.py +++ b/Tests/test_reader.py @@ -233,16 +233,69 @@ def test_get_page_of_encrypted_file(): @pytest.mark.parametrize( - "src,expected", + "src,expected,expected_method", [ - ("form.pdf", {"foo": ""}), - ("form_acrobatReader.pdf", {"foo": "Bar"}), - ("form_evince.pdf", {"foo": "bar"}), + ( + "form.pdf", + {"foo": ""}, + {"foo": {"/DV": "", "/FT": "/Tx", "/T": "foo", "/V": ""}}, + ), + ( + "form_acrobatReader.pdf", + {"foo": "Bar"}, + {"foo": {"/DV": "", "/FT": "/Tx", "/T": "foo", "/V": "Bar"}}, + ), + ( + "form_evince.pdf", + {"foo": "bar"}, + {"foo": {"/DV": "", "/FT": "/Tx", "/T": "foo", "/V": "bar"}}, + ), ], ) -def test_form(src, expected): +def test_get_form(src, expected, expected_method): """Check if we can read out form data.""" src = os.path.join(RESOURCE_ROOT, src) - pdf = PdfFileReader(src) - fields = pdf.getFormTextFields() + reader = PdfFileReader(src) + fields = reader.getFormTextFields() assert fields == expected + + fields = reader.getFields() + assert fields == expected_method + + +@pytest.mark.parametrize( + "src,page_nb", + [ + ("form.pdf", 0), + ("pdflatex-outline.pdf", 2), + ], +) +def test_get_page_number(src, page_nb): + src = os.path.join(RESOURCE_ROOT, src) + reader = PdfFileReader(src) + page = reader.pages[page_nb] + assert reader.getPageNumber(page) == page_nb + + +@pytest.mark.parametrize( + "src,expected", + [ + ("form.pdf", None), + ], +) +def test_get_page_layout(src, expected): + src = os.path.join(RESOURCE_ROOT, src) + reader = PdfFileReader(src) + assert reader.getPageLayout() == expected + + +@pytest.mark.parametrize( + "src,expected", + [ + ("form.pdf", "/UseNone"), + ], +) +def test_get_page_mode(src, expected): + src = os.path.join(RESOURCE_ROOT, src) + reader = PdfFileReader(src) + assert reader.getPageMode() == expected diff --git a/Tests/test_writer.py b/Tests/test_writer.py index c4913fc2b..5a750871d 100644 --- a/Tests/test_writer.py +++ b/Tests/test_writer.py @@ -30,10 +30,8 @@ def test_writer_operations(): writer.addBlankPage() assert exc.value.args == () writer.insertPage(page, 1) - writer.removeText() writer.insertPage(reader_outline.pages[0], 0) writer.addBookmarkDestination(page) - # output.addNamedDestination("A named destination", 1) writer.removeLinks() # assert output.getNamedDestRoot() == ['A named destination', IndirectObject(9, 0, output)] writer.addBlankPage() @@ -48,8 +46,8 @@ def test_writer_operations(): writer.insertBlankPage(width=100, height=100) writer.insertBlankPage() # without parameters - # This gives "KeyError: '/Contents'" - is that a bug? - # output.removeImages() + # TODO: This gives "KeyError: '/Contents'" - is that a bug? + # writer.removeImages() writer.addMetadata({"author": "Martin Thoma"}) @@ -64,15 +62,22 @@ def test_writer_operations(): os.remove(tmp_path) -def test_remove_images(): - pdf_path = os.path.join(RESOURCE_ROOT, "side-by-side-subfig.pdf") +@pytest.mark.parametrize( + "input_path,ignoreByteStringObject", + [ + ("side-by-side-subfig.pdf", False), + ("reportlab-inline-image.pdf", True), + ], +) +def test_remove_images(input_path, ignoreByteStringObject): + pdf_path = os.path.join(RESOURCE_ROOT, input_path) reader = PdfFileReader(pdf_path) writer = PdfFileWriter() page = reader.pages[0] writer.insertPage(page, 0) - writer.removeImages() + writer.removeImages(ignoreByteStringObject=ignoreByteStringObject) # finally, write "output" to PyPDF2-output.pdf tmp_filename = "dont_commit_writer_removed_image.pdf" @@ -81,7 +86,36 @@ def test_remove_images(): with open(tmp_filename, "rb") as input_stream: reader = PdfFileReader(input_stream) - assert "Lorem ipsum dolor sit amet" in reader.getPage(0).extractText() + if input_path == "side-by-side-subfig.pdf": + assert "Lorem ipsum dolor sit amet" in reader.getPage(0).extractText() + + # Cleanup + os.remove(tmp_filename) + + +@pytest.mark.parametrize( + "input_path,ignoreByteStringObject", + [ + ("side-by-side-subfig.pdf", False), + ("side-by-side-subfig.pdf", True), + ("reportlab-inline-image.pdf", False), + ("reportlab-inline-image.pdf", True), + ], +) +def test_remove_text(input_path, ignoreByteStringObject): + pdf_path = os.path.join(RESOURCE_ROOT, input_path) + + reader = PdfFileReader(pdf_path) + writer = PdfFileWriter() + + page = reader.pages[0] + writer.insertPage(page, 0) + writer.removeText(ignoreByteStringObject=ignoreByteStringObject) + + # finally, write "output" to PyPDF2-output.pdf + tmp_filename = "dont_commit_writer_removed_text.pdf" + with open(tmp_filename, "wb") as output_stream: + writer.write(output_stream) # Cleanup os.remove(tmp_filename) @@ -189,3 +223,89 @@ def test_add_named_destination(): # Cleanup os.remove(tmp_filename) + + +def test_add_uri(): + reader = PdfFileReader(os.path.join(RESOURCE_ROOT, "pdflatex-outline.pdf")) + writer = PdfFileWriter() + + for page in reader.pages: + writer.addPage(page) + + from PyPDF2.pdf import RectangleObject + + writer.addURI( + 1, + "http://www.example.com", + RectangleObject([0, 0, 100, 100]), + border=[1, 2, 3, [4]], + ) + writer.addURI( + 2, + "https://pypdf2.readthedocs.io/en/latest/", + RectangleObject([20, 30, 50, 80]), + border=[1, 2, 3], + ) + writer.addURI( + 3, + "https://pypdf2.readthedocs.io/en/latest/user/adding-pdf-annotations.html", + "[ 200 300 250 350 ]", + border=[0, 0, 0], + ) + writer.addURI( + 3, + "https://pypdf2.readthedocs.io/en/latest/user/adding-pdf-annotations.html", + [100, 200, 150, 250], + border=[0, 0, 0], + ) + + # write "output" to PyPDF2-output.pdf + tmp_filename = "dont_commit_uri.pdf" + with open(tmp_filename, "wb") as output_stream: + writer.write(output_stream) + + # Cleanup + os.remove(tmp_filename) + + +def test_add_link(): + reader = PdfFileReader(os.path.join(RESOURCE_ROOT, "pdflatex-outline.pdf")) + writer = PdfFileWriter() + + for page in reader.pages: + writer.addPage(page) + + from PyPDF2.pdf import RectangleObject + + writer.addLink( + 1, + 2, + RectangleObject([0, 0, 100, 100]), + border=[1, 2, 3, [4]], + fit="/Fit", + ) + writer.addLink(2, 3, RectangleObject([20, 30, 50, 80]), [1, 2, 3], "/FitH", None) + writer.addLink( + 3, + 0, + "[ 200 300 250 350 ]", + [0, 0, 0], + "/XYZ", + 0, + 0, + 2, + ) + writer.addLink( + 3, + 0, + [100, 200, 150, 250], + border=[0, 0, 0], + ) + + # write "output" to PyPDF2-output.pdf + tmp_filename = "dont_commit_link.pdf" + with open(tmp_filename, "wb") as output_stream: + writer.write(output_stream) + + # Cleanup + # os.remove(tmp_filename) From b94da4f28080d28dcbc6b8812b21fc5801d1c824 Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Sun, 17 Apr 2022 14:40:45 +0200 Subject: [PATCH 3/4] DOC: Forms --- docs/user/forms.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/user/forms.md b/docs/user/forms.md index e6d5c012c..0b58d679e 100644 --- a/docs/user/forms.md +++ b/docs/user/forms.md @@ -1,5 +1,15 @@ # Interactions with PDF Forms +## Reading form fields + +```python +from PyPDF2 import PdfFileReader + +reader = PdfFileReader("form.pdf") +fields = reader.getFormTextFields() +fields == {"key": "value", "key2": "value2"} +``` + ## Filling out forms ```python From 55a6523682af750c3db2e4fbb73d8d0a9bb3a4cf Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Sun, 17 Apr 2022 14:41:51 +0200 Subject: [PATCH 4/4] Remove unused variable --- Tests/test_writer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Tests/test_writer.py b/Tests/test_writer.py index 5a750871d..3b80d1974 100644 --- a/Tests/test_writer.py +++ b/Tests/test_writer.py @@ -154,7 +154,6 @@ def test_fill_form(): writer = PdfFileWriter() page = reader.pages[0] - fields = reader.getFields() writer.addPage(page)