-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
TST: Add test for decrypting files (#661)
Also: Adjust ground truth "crazyones" example text
- Loading branch information
1 parent
02cc54b
commit 1cde559
Showing
4 changed files
with
73 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
TheCrazyOnesOctober14,1998Herestothecrazyones.Themis˝ts.Therebels.Thetroublemakers.Theroundpegsinthesquareholes.Theoneswhoseethingsdi˙erently.Theyrenotfondofrules.Andtheyhavenorespectforthestatusquo.Youcanquotethem,disagreewiththem,glorifyorvilifythem.Abouttheonlythingyoucantdoisignorethem.Becausetheychangethings.Theyinvent.Theyimagine.Theyheal.Theyexplore.Theycreate.Theyinspire.Theypushthehumanraceforward.Maybetheyhavetobecrazy.Howelsecanyoustareatanemptycanvasandseeaworkofart?Orsitinsilenceandhearasongthatsneverbeenwritten?Orgazeataredplanetandseealaboratoryonwheels?Wemaketoolsforthesekindsofpeople.Whilesomeseethemasthecrazyones,weseegenius.Becausethepeoplewhoarecrazyenoughtothinktheycanchangetheworld,aretheoneswhodo. | ||
The Cr azy Ones Octob er 14, 1998 Heres to the crazy ones. The mis˝ts. The reb els. The troublemak ers. The round p egs in the square holes. The ones who see things di˙eren tly . Theyre not fond of rules. And they ha v e no resp ect for the status quo. Y ou can quote them, disagree with them, glorify or vilify them. Ab out the only thing y ou cant do is ignore them. Because they c hange things. They in v en t. They imagine. They heal. They explore. They create. They inspire. They push the h uman race forw ard. Ma yb e they ha v e to b e crazy . Ho w else can y ou stare at an empt y can v as and see a w ork of art? Or sit in silence and hear a song thats nev er b een written? Or gaze at a red planet and see a lab oratory on wheels? W e mak e to ols for these kinds of p eople. While some see them as the crazy ones, w e see genius. Because the p eople who are crazy enough to think they can c hange the w orld, are the ones who do. |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
import os | ||
import binascii | ||
import sys | ||
|
||
from PyPDF2 import PdfFileReader, PdfFileWriter | ||
|
||
TESTS_ROOT = os.path.abspath(os.path.dirname(__file__)) | ||
PROJECT_ROOT = os.path.dirname(TESTS_ROOT) | ||
RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "Resources") | ||
|
||
sys.path.append(PROJECT_ROOT) | ||
|
||
def test_PdfReaderFileLoad(): | ||
""" | ||
Test loading and parsing of a file. Extract text of the file and compare to expected | ||
textual output. Expected outcome: file loads, text matches expected. | ||
""" | ||
|
||
with open(os.path.join(RESOURCE_ROOT, "crazyones.pdf"), "rb") as inputfile: | ||
# Load PDF file from file | ||
ipdf = PdfFileReader(inputfile) | ||
ipdf_p1 = ipdf.getPage(0) | ||
|
||
# Retrieve the text of the PDF | ||
with open(os.path.join(RESOURCE_ROOT, "crazyones.txt"), "rb") as pdftext_file: | ||
pdftext = pdftext_file.read() | ||
|
||
ipdf_p1_text = ipdf_p1.extractText().replace("\n", "").encode("utf-8") | ||
|
||
# Compare the text of the PDF to a known source | ||
assert ipdf_p1_text == pdftext, ( | ||
"PDF extracted text differs from expected value.\n\nExpected:\n\n%r\n\nExtracted:\n\n%r\n\n" | ||
% (pdftext, ipdf_p1_text) | ||
) | ||
|
||
|
||
def test_PdfReaderJpegImage(): | ||
""" | ||
Test loading and parsing of a file. Extract the image of the file and compare to expected | ||
textual output. Expected outcome: file loads, image matches expected. | ||
""" | ||
|
||
with open(os.path.join(RESOURCE_ROOT, "jpeg.pdf"), "rb") as inputfile: | ||
# Load PDF file from file | ||
ipdf = PdfFileReader(inputfile) | ||
|
||
# Retrieve the text of the image | ||
with open(os.path.join(RESOURCE_ROOT, "jpeg.txt"), "r") as pdftext_file: | ||
imagetext = pdftext_file.read() | ||
|
||
ipdf_p0 = ipdf.getPage(0) | ||
xObject = ipdf_p0["/Resources"]["/XObject"].getObject() | ||
data = xObject["/Im4"].getData() | ||
|
||
# Compare the text of the PDF to a known source | ||
assert binascii.hexlify(data).decode() == imagetext, ( | ||
"PDF extracted image differs from expected value.\n\nExpected:\n\n%r\n\nExtracted:\n\n%r\n\n" | ||
% (imagetext, binascii.hexlify(data).decode()) | ||
) | ||
|
||
|
||
def test_decrypt(): | ||
with open(os.path.join(RESOURCE_ROOT, "libreoffice-writer-password.pdf"), "rb") as inputfile: | ||
ipdf = PdfFileReader(inputfile) | ||
assert ipdf.isEncrypted == True | ||
ipdf.decrypt('openpassword') | ||
assert ipdf.getNumPages() == 1 | ||
assert ipdf.isEncrypted == True | ||
|
||
# Is extractText() broken for encrypted files? | ||
# assert ipdf.getPage(0).extractText().replace('\n', '') == "\n˘\n\u02c7\u02c6˙\n\n\n˘\u02c7\u02c6˙\n\n" |