From 012709f7a145d1fd85c3f6ca2a8e4c792d14e94b Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Fri, 15 Apr 2022 12:29:18 +0200 Subject: [PATCH] TST: Increase Test coverage (#756) Adding unit Tests: * xmp * ConvertFunctionsToVirtualList * PyPDF2.utils.hexStr * Page operations with encoded file * merging encrypted * images DOC: Comments to docstrings STY: Remove vim comments BUG: CCITTFaxDecode decodeParms can be an ArrayObject. I don't know how a good solution would look like. Now it doesn't throw an error, but the result might be wrong. BUG: struct was not imported for Python 2.X --- PyPDF2/filters.py | 12 +++- PyPDF2/generic.py | 49 ++++++------- PyPDF2/merger.py | 2 - PyPDF2/pdf.py | 9 +-- PyPDF2/utils.py | 7 +- Resources/imagemagick-ASCII85Decode.pdf | Bin 0 -> 2848 bytes Resources/imagemagick-CCITTFaxDecode.pdf | Bin 0 -> 1880 bytes Resources/imagemagick-images.pdf | Bin 0 -> 16012 bytes Resources/imagemagick-lzw.pdf | Bin 0 -> 2678 bytes Resources/metadata.pdf | Bin 0 -> 13294 bytes Tests/test_basic_features.py | 34 ++++----- Tests/test_javascript.py | 36 ++++++---- Tests/test_merger.py | 21 +++--- Tests/test_page.py | 51 ++++++++++++- Tests/test_pagerange.py | 12 ++++ Tests/test_reader.py | 87 ++++++++++++++++------- Tests/test_utils.py | 26 ++++++- Tests/test_workflows.py | 3 +- Tests/test_writer.py | 31 +++++++- Tests/test_xmp.py | 35 +++++++-- 20 files changed, 296 insertions(+), 119 deletions(-) create mode 100644 Resources/imagemagick-ASCII85Decode.pdf create mode 100644 Resources/imagemagick-CCITTFaxDecode.pdf create mode 100644 Resources/imagemagick-images.pdf create mode 100644 Resources/imagemagick-lzw.pdf create mode 100644 Resources/metadata.pdf diff --git a/PyPDF2/filters.py b/PyPDF2/filters.py index 1ecce31c8..8b72f0932 100644 --- a/PyPDF2/filters.py +++ b/PyPDF2/filters.py @@ -1,5 +1,3 @@ -# vim: sw=4:expandtab:foldmethod=marker -# # Copyright (c) 2006, Mathieu Fenniak # All rights reserved. # @@ -40,7 +38,7 @@ from cStringIO import StringIO else: from io import StringIO - import struct +import struct try: import zlib @@ -356,6 +354,10 @@ def decode(data, decodeParms=None): class CCITTFaxDecode(object): def decode(data, decodeParms=None, height=0): if decodeParms: + from PyPDF2.generic import ArrayObject + if isinstance(decodeParms, ArrayObject): + if len(decodeParms) == 1: + decodeParms = decodeParms[0] if decodeParms.get("/K", 1) == -1: CCITTgroup = 4 else: @@ -451,6 +453,10 @@ def _xobj_to_image(x_object_obj): img_byte_arr = io.BytesIO() img.save(img_byte_arr, format="PNG") data = img_byte_arr.getvalue() + elif x_object_obj["/Filter"] in (["/LZWDecode"], ['/ASCII85Decode'], ['/CCITTFaxDecode']): + from PyPDF2.utils import b_ + extension = ".png" + data = b_(data) elif x_object_obj["/Filter"] == "/DCTDecode": extension = ".jpg" elif x_object_obj["/Filter"] == "/JPXDecode": diff --git a/PyPDF2/generic.py b/PyPDF2/generic.py index 334d76609..2ae9a7471 100644 --- a/PyPDF2/generic.py +++ b/PyPDF2/generic.py @@ -44,6 +44,8 @@ import decimal import codecs +from PyPDF2.utils import ERR_STREAM_TRUNCATED_PREMATURELY + ObjectPrefix = b_('/<[tf(n%') NumberSigns = b_('+-') IndirectPattern = re.compile(b_(r"[+-]?(\d+)\s+(\d+)\s+R[^a-zA-Z]")) @@ -199,8 +201,7 @@ def readFromStream(stream, pdf): while True: tok = stream.read(1) if not tok: - # stream has truncated prematurely - raise PdfStreamError("Stream has ended unexpectedly") + raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY) if tok.isspace(): break idnum += tok @@ -208,8 +209,7 @@ def readFromStream(stream, pdf): while True: tok = stream.read(1) if not tok: - # stream has truncated prematurely - raise PdfStreamError("Stream has ended unexpectedly") + raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY) if tok.isspace(): if not generation: continue @@ -273,10 +273,11 @@ def readFromStream(stream): readFromStream = staticmethod(readFromStream) -## -# Given a string (either a "str" or "unicode"), create a ByteStringObject or a -# TextStringObject to represent the string. def createStringObject(string): + """ + Given a string (either a "str" or "unicode"), create a ByteStringObject or a + TextStringObject to represent the string. + """ if isinstance(string, utils.string_type): return TextStringObject(string) elif isinstance(string, utils.bytes_type): @@ -306,8 +307,7 @@ def readHexStringFromStream(stream): while True: tok = readNonWhitespace(stream) if not tok: - # stream has truncated prematurely - raise PdfStreamError("Stream has ended unexpectedly") + raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY) if tok == b_(">"): break x += tok @@ -328,8 +328,7 @@ def readStringFromStream(stream): while True: tok = stream.read(1) if not tok: - # stream has truncated prematurely - raise PdfStreamError("Stream has ended unexpectedly") + raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY) if tok == b_("("): parens += 1 elif tok == b_(")"): @@ -392,16 +391,17 @@ def readStringFromStream(stream): return createStringObject(txt) -## -# Represents a string object where the text encoding could not be determined. -# This occurs quite often, as the PDF spec doesn't provide an alternate way to -# represent strings -- for example, the encryption data stored in files (like -# /O) is clearly not text, but is still stored in a "String" object. class ByteStringObject(utils.bytes_type, PdfObject): + """ + Represents a string object where the text encoding could not be determined. + This occurs quite often, as the PDF spec doesn't provide an alternate way to + represent strings -- for example, the encryption data stored in files (like + /O) is clearly not text, but is still stored in a "String" object. + """ ## # For compatibility with TextStringObject.original_bytes. This method - # returns self. + # self. original_bytes = property(lambda self: self) def writeToStream(self, stream, encryption_key): @@ -413,12 +413,14 @@ def writeToStream(self, stream, encryption_key): stream.write(b_(">")) -## -# Represents a string object that has been decoded into a real unicode string. -# If read from a PDF document, this string appeared to match the -# PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding to -# occur. class TextStringObject(utils.string_type, PdfObject): + """ + Represents a string object that has been decoded into a real unicode string. + If read from a PDF document, this string appeared to match the + PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding to + occur. + """ + autodetect_pdfdocencoding = False autodetect_utf16 = False @@ -569,8 +571,7 @@ def readFromStream(stream, pdf): skipOverComment(stream) continue if not tok: - # stream has truncated prematurely - raise PdfStreamError("Stream has ended unexpectedly") + raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY) if debug: print(("Tok:", tok)) if tok == b_(">"): diff --git a/PyPDF2/merger.py b/PyPDF2/merger.py index 854d8cdb8..d5fd22414 100644 --- a/PyPDF2/merger.py +++ b/PyPDF2/merger.py @@ -1,5 +1,3 @@ -# vim: sw=4:expandtab:foldmethod=marker -# # Copyright (c) 2006, Mathieu Fenniak # All rights reserved. # diff --git a/PyPDF2/pdf.py b/PyPDF2/pdf.py index 0f7692bf5..9bfa1bd0d 100644 --- a/PyPDF2/pdf.py +++ b/PyPDF2/pdf.py @@ -1,7 +1,5 @@ # -*- coding: utf-8 -*- # -# vim: sw=4:expandtab:foldmethod=marker -# # Copyright (c) 2006, Mathieu Fenniak # Copyright (c) 2007, Ashish Kulkarni # @@ -1637,7 +1635,7 @@ def _getObjectFromStream(self, indirectReference): streamData.seek(0, 0) lines = streamData.readlines() for i in range(0, len(lines)): - print((lines[i])) + print(lines[i]) streamData.seek(pos, 0) try: obj = readObject(streamData, self) @@ -2588,11 +2586,6 @@ def mergeRotatedScaledTranslatedPage(self, page2, rotation, scale, tx, ty, expan ctm[1][0], ctm[1][1], ctm[2][0], ctm[2][1]], expand) - ## - # Applys a transformation matrix the page. - # - # @param ctm A 6 elements tuple containing the operands of the - # transformation matrix def addTransformation(self, ctm): """ Applies a transformation matrix to the page. diff --git a/PyPDF2/utils.py b/PyPDF2/utils.py index 3270d86f8..87b3a8b24 100644 --- a/PyPDF2/utils.py +++ b/PyPDF2/utils.py @@ -39,7 +39,7 @@ except ImportError: # Py3 import builtins - +ERR_STREAM_TRUNCATED_PREMATURELY = "Stream has ended unexpectedly" xrange_fn = getattr(builtins, "xrange", range) _basestring = getattr(builtins, "basestring", str) @@ -122,7 +122,7 @@ def skipOverComment(stream): def readUntilRegex(stream, regex, ignore_eof=False): """ Reads until the regular expression pattern matched (ignore the match) - Raise PdfStreamError on premature end-of-file. + :raises PdfStreamError: on premature end-of-file :param bool ignore_eof: If true, ignore end-of-line and return immediately """ name = b_('') @@ -133,7 +133,7 @@ def readUntilRegex(stream, regex, ignore_eof=False): if ignore_eof: return name else: - raise PdfStreamError("Stream has ended unexpectedly") + raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY) m = regex.search(tok) if m is not None: name += tok[:m.start()] @@ -242,7 +242,6 @@ def b_(s): bc[s] = r return r except Exception: - print(s) r = s.encode('utf-8') if len(s) < 2: bc[s] = r diff --git a/Resources/imagemagick-ASCII85Decode.pdf b/Resources/imagemagick-ASCII85Decode.pdf new file mode 100644 index 0000000000000000000000000000000000000000..46aabc0fc4aa9125f7bea9a67d748712d4663ba1 GIT binary patch literal 2848 zcmc&$TUX*p5YBV|if$AI6?4D9Aj4g8Mq$JmXLZ~$Bprwf(Gd1HKI~85-KvlP1J2W) zJ&VUqS9Mi&)%T^llI=Cy1^LjVM5YoA2KS^~CSuPX@+nn7b3w$*A5WZ$4Lh* zJGkdg1am5O#uByS@Q{w!3j?RmBWf%(XWaGedhkMTB+#jcmKG0~OBg)8y%0paf|?sD0z*{1i|`%kvRsf-l@ zWOYB9_ITI`#!tZnd9;X_U{s7PmKeRR{1{`~?DaJa%lLp_qWI-=VZsLI3NsvR^EUevqGd~C@4WRt|bOu>@*s6d&*rZ*lVtCqUeNS~GJ-FbzK#Bvu~-&kHf_StsRI%kE`;%9bn_Vo%1lLzeJyK~J7R-w!Y z7y`<_2=;CsEPsAoX8Xs_Eq0)^8yHby2c!Ew+pj#-FjxuLA>J9}vmgNRZ|5%5J5cU@ zz)F0S?MCKzZt1Xi{(U0B-x9+MBsObOk)-@&wV zoGQ0ZA^PrzJ6L%bwf6U%XRs`*K$t-W07t%$z;zTDzFosmm4VesCN?|%csuDJw;tihaz^;MXcmTw!zU?bLW1~+cL_;BE1GBs&&23Om|e|s;<^V@!T z2v?2_p!9!TOmSN4H&faqe!A@Cv{9_yVQ;ntIqeIpmgxi5NK{(8>bhww0dqkp&M5M)!8oI9P)T0X> zUgh|slci3Fg4c910<)%5C3r8^b;ZqinzZSwX+X)EuVw%xYdX^aLf3Whp|Y;CzW>URBe65_0*Y1O zN5VxAzz4*9-f7aCvf>$-t*|ufFmQ-oLO_i&bBAKRZ7>;QVbXze!*BW@m jf$2#aA83|mY8CqLL)^~WupLJ6tEDJPkz}*2ZkzlA|5?lD literal 0 HcmV?d00001 diff --git a/Resources/imagemagick-CCITTFaxDecode.pdf b/Resources/imagemagick-CCITTFaxDecode.pdf new file mode 100644 index 0000000000000000000000000000000000000000..e5cbe2043b0cb89540d2c2d746dc5eecdbcc1520 GIT binary patch literal 1880 zcmd5-&5qkP5Z0k5o_px60~A{%hh38TwS$FGtX(HYnr7=Y&;W}ZD2ehaYl*HzH%^d; z>3j5$7s$)xUL>87C9h*`4@EDvV2hZUZ#ews8~WsOGCh@N9xw?4O#F(SpEG``mbwNP zEpx^fZ`K<4SXs3)OEwxYU8WRB?hdevX<&>$&C?pL!6P`lWPEHkr3HBaC08Fx`&Jr5 z+l}YCHk&HJN~oq6&*N7*vA704D+COh^jHqRtW0vHErff^Go9w@!ffGMpv^lQ_{|U1 zQknJr4pUm(y9S@K;HhsmMGTTynIBY6-mdbWbh)%Q5Og86t+XoGZ>-sHzcVe8f;B}? z4Eu!CQPHi;PjSryn?WRvk}zOuT)w(;?JwpdKncA^uLYHHwah0Q6buhf92a4LactW3I2e-POV?44tw6?`9Pk0 zk=vDq2cJGWxl=gGSvtCX=T73>I;YMUpP943XNvxS-0Rqh9ov!a`^s?=Te9Ui#Ia*Lli@L?{D9I4grT89J!f^V?n`!@ znEaDwB<-F(`#5Lsp6{I9y|umV;U<2y$}&9bXEV`Jrlp1P^+tyE94i2&kMRv07}r@} zD3XtiWrmq`>li&9M@B3*D-?NXHO6;!GM;01u_9Jy`2hVBW%?&$FMX_{(m$=pGRXKs znTd3sRcx(ek+*dmT2=>EV{CnTE;Esh!C-ON;S7|tw6MOgpDp$PqxEg?iH_>AVnJ8R z&uX-9Ea}Z=V*NVwTcUL9@nj^J+0X9s15I45N%RLtn9YorFMv*AL^)Q%RzO`N6R9Z6 zQ_*blV5uq3COtizA7QCVkR_MT>X8()j|mQd5Pp^y*ntEC{|Y-0XX=8H*xunRXeeIK z4vaD#0}LHlX@r=Fr5GOy*PdavFgQa-o5oyWSJ=BMnzM&8BV`xsgGYe%4e0yxEHnW# zA7TehF4Uqq>f#)uju~J7L^Mwy3ncQ4Z*wFC&q&zM_=1s~PUpe5K_9zL&nIIM#@CUK zW#Y;7Fzf4%#QHL+NZO>>G=%fw?8}%rTX%GPW_KVjs4F>^*R$}r`NCt7yxy)uPdW(T z+m?)jj(LTFfh31V@-!LBjAgR@(#TNOlmxn#z#wIe8vRPN7pt% z`(KL9TxK%5wIfkp-SWVk078&Inx4KOo93F#iYJ%{?MOw34x_C&`Heq&KXRuZ8 z#H@~4xpm(l(>w^N6*ZXe?ih-7ZNzLZX64p_ov~)j#%VSvYHQX*YUOg8?HF3OuMV>+ zX4`^{FcQsL`PKnNPT8{i2GiA;-3C8gN1L`q9EU{R3M1P)n!qgf&M_VBzP0P?4`LeY za7}If`Ar)z)qy{2xw^jPySaL?Gs)GoYn!>Ije)IPWB2xLAQ&Iz8u!F@a!p!O3&#K> zXnB&x))X77-@LPhYv|n9!8Hou5Vq*&8b?O^xrTLn+pw`_m>nk34dW?I0O~(n*C!4E zan;o*iLf)LU9$_-rGg~&dCcaMIc-}lPDkT4AbO$~r~dg zWA&arz}nSm5^FX>>npDwf|9)>9Sy5u`+&1Wgb7oaD5#^p9~IXnz|ZcT@YBk}tfs3? zrWH8f%O~*XT99`|cq{a?eg`o2sFq?FmzwLL+^%MevpgeJHtPCD3QbPKM;8Y!CROs6 z)}S3*&Zqt0i;!Sh__H=3v;~(1F>N^@9<*#2cz_Czsj(Sv8>4AvZ8pg?Q;4A!l1~?F zc~&UmICSCyTPUI`V6n(VTY=guN}37W3WohI;;m4!)uwlX#ikWC?72V0_=yJ*_D* zS8#hpHxss3tNFtGxwi&~Ui`RSxNPAqp$EylZwDI|e&gsH{JlT^qENW2qvrF0i$}Xx z-h8tcglUT?qNJLug^q*HgoVNa^%zSiOtYq9rh&o&bt@TP1%&}Iil|&c;R*^Hp`r;o zHWa27+gxB43V+u2L@dGt0FdDr%-EjL7M=D)v<1%=)QV<=Y#AQW=6cu|&EU|+XhZpP zz+{0HDIijmbq2r!D^Zm=Fj=J5YsQ!?8uq=2$x_Lb8<bD*Ta@fo9qyN-Ak4G}D3539+DAqW)zGnrYV5 z&orP}D!SnnXok3|0?m~GxwzYC*krmma534SnHp|$ft`1=Qp5}kpg#k%Vg|!Dvr;w~ zmQ26I#%K$MErgcMXwqV&TiK?PrW-Xop;%;PD!c=VWh-KyNq8(92E7QyO38K`(@u-c z(Hl@KQ#0#;;>k9e#mOL+wt?-6rLdO@grunX470s1~Amxf?ZKM%XR*C$~cy0fdhxA5kypTnN- zp5DHmaMz{|;3j0&{bR9IZjPUjR63vS>kP8nckE>6zD?$kh2$b~B~c@>+<04W?BweJN^b(dt$plG~F_I@u@Sg@sj`Tr!aFY7VX#HNO z6@^+!5`!A;BtudpLhBQp1K@#=rQ2LW6ey|c;vgXx94{0;s3m0Xw+K0Xq)_?-OF%CMX~^a>#ZpsP)PBILW@BV@%ew7q*0x#vNJlHR7|G4xmX zI|tF>O{3=5a;Rk*9F`nvnne~gveR&=m10a?aj3b9LtQy>avinP z#es{-=1^1j%UocVL;YF%CQ8xgvs~%2zDdOmskkA4uoX9?;)a|bH-wrYw01l%NTwU2 zl->DO%$;usYuQLpF(W|-t1Yab_BOdQ2$xuunoUO#u3B5t&TGW;gUTFKg+9G zs@A47du&~$Qp@fhYv%5;#i*4vv>GB2 z3U6by;pVgv&bkOKvl>Mmif&HegCeQ|2k>duhV)E;5AIFdE<*=O#@YZrT5L|T0eo8N zw73L(^g6VB?gzp0WB^I~q7PnamzoNWq^SXtYSSF~G!BC0nH&Vu`Eho3M%;xCtyObY zmnj0quFe#wtGS(%8DFNLEux6l)dxBbIujNO2k7}l3599aRLnF`IA9!xR4!*#HmoWr z?AoxRivt&v4TY)2HW%3WvnPQfOjsB^V|zkd#DUxFN!f5RV1|=6Mq4mxoj?Xml(#Y3 zP~JC?%#Z18-KPaOJHivBVWJ!Vc(SyYo=KT>IGwAF1{5p}UJF zD4sD3BCb9cpM)}@ddok z6^Q1MLoB0vF@WK@yFIV&CBge#NupQxlIYdFBzngz$*>As{Yt6`X39AhN%gJ;NkxCn z$tNq`vB-+IuB_pm8&_QocgNf`?|jGsuMXq@-d}gs4M?83RCv6T>Eu%cd?~<9^R87z zz;MOIC;B~gRRyp5I{8!uZ`8PH=$1QanvAz?-866AfaYBv0nMxXfQHvhob3fPj6>Zt zH2W@E0N-VG(0D)ZFGCBsbUiI@r;j` zymfimV-tB^4tVP-_+E>v4+Sr*x@jJp&GQNdy)Hft0ozGKIP}sywuu+~-Z2ZjS6)H# z>RteY;ORs1*n3_SJ(vPCZ(T4i9vz4>-WqnwCdwYXl_P?hh-=YAAgU((k{*?`get9LXWqnM@qSq(o2SkRFRB4QQ(N27 H6K4Jo4)34X literal 0 HcmV?d00001 diff --git a/Resources/imagemagick-lzw.pdf b/Resources/imagemagick-lzw.pdf new file mode 100644 index 0000000000000000000000000000000000000000..b57e07f25e1c99df822100e7d44e04e6c1ac8fe9 GIT binary patch literal 2678 zcmcImOK%%h6rLUE3RDRRmgru`PV6SmydQRx)bG@39lLSbl%$~657Fl24uz1aRUzff7?-R#lyXi@l)cjEcYz6_5Gr5c>vVih$`P&YXAw zK673Wm}!Y)UMA2Hu{mmm5t~sWA|Bd(pcc?1V#Vu)JOJ9nC#NSR?)Ytv+?{xBXi+wZ z_=tO*F31@X16Tb9~+kv|+`+`uvlh z3KxID7*aSF>Jyp7Y2#eB0^ty=oH=ULVAXYPpdL)5@{4q_6s?_k0M7= z&)uAp>^bb#<0#k47f$s6#RiIc?)cQnqS%haf@(c@2)m87NGxCM-6T;oQOp;J(rpEE zId^R6{RQ#nd=NwN1Qu4#TTj(%*d*5{V&l?95l?5NToQL5rfzYMF)W>LhEE>hu8hkr zOCGJgWGQvuWAT!8$kGRyXKbtX;shF#TWsrhN@jZSLQc z*_Ki%Vn&H=buXK2bMGpT$+Ex>acfY|i~z*Hk~~r`KzaW$cA}gPt>Bk;+uCpf~DkQREvt?<1dr0~Mx zcIf$o5{{r`DXU0|B5ATN%bFr(z@g^1KWKxSu|0BNtS7r+csoTkPG-)+f(*XRX zSj_gQySuvvcZVPH&-v$_cW?cB zUcIjFQA?(bIlFtUs;=2&3L;{3jP$GkvWc1A1pq6M0cdYz1>oTUfMiVV%v~&i9BjaU z8~~7*rHzZJ6A&b3W9VWkVrpz}VhZ5n130@lnHt&xJeHgRn^W%0)hY}u6Tet0SyW&p zA(#-aqtQlWN3KU?qtS$v&~jM2m_JD?4YS11vM5Ed7z?+cA<)TQk02tVyNpI7UL!=) z0f!I-opj2HbikAhP=^7gb|(LD@Tc`oj*;=duw(=>vaoUdlQR>LlYy1_pFL(E!#|rq zMg|6EmcQZr&%wmhK&d3!V$vf_(XBGDNVxwwkQjCtW+ql)r^10m5)vd5QW9Y`E+jE7 z?0DC{?RQac`bX)k*1XR8&8PVDZHGlHHJW5)8FYe9V;?DLC~(6*3TUZDB|vm^c8>OS zc7{la7aAi&1-)YAP91(;Svkod5_&TPoCtDjJr+!ih8(Og3&=lA z=pK-VIxxBhfc;Pep6v!K+{Y~oFbJsomBb7C`%0e(OY}HP0h7qh4Fm$QWhHQ@{Yopt zLWS=bCa{8;0B;Oh;Srn*Scig`36RTbr#2SOhd5YRYUjdf1RTyF;0Ec(JO=$p`Luz-(8>!AXb%~!wpCr~Yrp|$vMp9k?^;}d2e!7|~Wn|a)6 z*u61}xYL?>@7*Jg+q-$(H8G6nPL6jV0|izU{r4%1&!DTl?3$6E^b=fzIlKj*U!mD< zKU$})i)wf`R2~T(>+%XjGWnh@7JBhHBs1*H>_UU_MebMk3W`Xkxo6P0wifG<9aW<5YOhDILH=OL(Y*ZTrv~v{l|NqmW5?_B&ll~{ho5dw$OAA2VXrV~V3ppl zpWl+&lfu%LuK;g`3(k&jUH)tE9|v$vLb%vZY{>9V6;RilUNpbXM7cVPxCGiM^JhfN z+K-jZZeySx(kZK|9YikY?<{{F`xbRn9gX?vDiPVwTeen0QD2!Q?dbHc=1AoKgQ3yvDYp0)b&amsv$Bvo%aPN~@y0e%3LKj0yCog5^ z(nxvileTLZ>-1CHgn0P`7tiEHWs#Y11vRG~b&mcujReeZs2qf1nUqOiQxqX?w;SMJ zqSYF=>9iKOsy6WQGuMK;lKb#)j2z1}w#DcxZZ}D!K)aj*H zn4Cl#V_*g|4XzU<3We#{90j3W=v9w+x1l$PgNcRThy0LO{pwJT?%;q02XyH~`htJC z_n6rOqcVeET3aS(G#=3EAk+P$>we}3ZF?&>A3d;cJGpis(qv>f@x&veKD{H0N(Z7O z4S~j#U#|>kkU({jts<$noxyvUOShw#c(#;Yj2F%bcjMTAGGfj>Q#xTQw=+7Qx3myv zioaG2fA|d$b%4=Qv!>ZhOOwFVz3w{K%vpXOnT!+uR@_ba6Hz-+26q_rwi*uG>SN99$8|SoUY8U<1c;_d` z3J^|Uqm|Zd1-9YDFFpF+X;WLej>(jIB$k5=p05&Ltd4a2^DcBl4!=Jzw#cz^w8UDldkZ91% z(CB9hw(1HnN=m~4Ej22WKJV{xgN_2opIO9rnP9-Lm9P2t|r&=rSykTI;*Hy zo%mXIc8oUUT$G8jgHz%?i8)UM%NZB`EZfS686ENhK4?F*Lgg5mWPaB7HJyr2z=p`L zFSAZOz>F_3%HX-~M5aSqI5^BX@>vFr%c~5GIWB}~SH#j+qbf##C6i(IewTWZ;-d?+xX`&e1#_fh&PL^|s6vO>YGuKcR1NQ~%j&btKuQ z1`&u`Rkca+#b7{@FLC3rzg{Sj%hqWX%474k({+QFmA&c!UB{|!3|ZK;PSi9eYsi>}UZJXEhU>IXDd3(HRT=K9^3pH#u6eJ3K` z30a$;7moNonHcS6V|&qaEvpPuuNTqe6Kx@2AnyyzI+fcIubam0M6VP{wyB?`>7yH7 zyBHfF;I%$lI@vsZ&=LgcRTJNz&NOFur&f zdb?qETmnidZ{AwI2Wy)j-_6u17|YkC36Q#iaTIM9q>anqp(+&S6cNhil9Sn4WU=1) z>POgCI&u483f71x=E(5{5Ul<*6V(>%YQN>I8ryQ_TD1Wpp7aadLF~&8!|3(qu3cWT zY)5p(12f~INRKR>O+@7j^wm}5_T{hr{R%3yi-3M99suM70t!W?xndh#g4OFW&hFxy zu9Vc27HsL8syoYEaH@4Yh0wgd$}A~SA8QlmLx#09!CAZ|p@5+m7aX`jE zhLnvuIr=)}m*FhhaFb72>n|6oQVJwch}$I%Q~k*hlX6 zXcaR}N2`!xf825?`J|HiM5`M}67rHP6{a9W&C&?xc6}Xeex((fOTobCKs~!S915r98A)U#x;rnhA13Vw>ZWPyLGP-1N9>uYepgO5ROqQa-0nIk#T zL`AkcmQeVaZ8EZmHgB;4NV+#L|Gf3`X10~|V$&4d&y$&i%LuvlZv!Yf2p*(;{^NFW zM*Mf(5WP%cdCEAdSryCbO4$y-A=qh4ke7=c%iX7!J|!wo<>s*9u8KM(j;2uPdnXT1 z(IhPb!04zrR3N^5T>LVKFiV?#qhPL}2x*rH$9&bCg683gx{XynnERdXVn_`MbegxZ z9q-GaZzGjZ(fsJyf;%EX6oF*dfzQ8Fqk(pAz}pr8 zHFy(xv@hX~-23B{qHl6P@rr;qeZ{jLywcZ2Dzfxy?y9jUt)F1LfGz(5k<{yMu=lD4 zf+UTHiE7)at}ta(R_dIUjxTRgp-JfbRUNLD6`i27DlHiEUJ(W>cIh)Vo4XK4`^&ru zfJ5Xo83o1mHfBjv2Ic4Z?{T4{{>!muh=rZVZ+A2mT1FyXQI~7wcN?MG$z6^0m~fFZ zgA@i5!&)K-Cw?{Bw)h7H-SaZoUuazBEKc-r`;p*t=wxm33HJBw8mB&{qpYw;I%7r$ znpSF5WOXdKr_HycC`r?;oVjN<97x+{B?{EQ36C?8%l+qZ(BiC}GQ$0kYChdO$6SPpxGtxQkf@FrPgkg1j7%5p)GV(9QlbIS8pz z{}rl;Q@>wC;#ApGO?Wp>u#P8B8CENANY!FkRE!$LI+cjksg$lAGPP^ZlW$s!i^+W9 zK3%`MzZ4@-Jy{yzj)+Z;8IW;oZ9-3_ zG0thrAcxg-(oCe)^)1)i9~nZ?Y8!f{EK$iizV7WDZeu;a63?YCQ^kY}z9tt$-;mS@ znN1@zm8|pUENlUTv&yuZL>B&R&Dzt|v1vMLF*0UTrQr=eLH(9J;M+M*>`-QNfa8^F zM3=%@=H=+?hetH;P{mi4iZ7RC&jdF_m+8_g&ec%Z7rhm~lA}_K=pi%=_>#abR6e6x zEeWB0DVxVb|3rPs`c{A!nH?NgXSpHS6NZV6&Te2{e7JgvhBj1VBTpThmJy28IHPc# zpS0ZlX%Rik(WPj#Hk4hyQb>fp?F~fw@}#v1*#=mW`$E8~?STz~4jrx+T!Mmw0PkCdA zgc7xC0tyF9U3(aRKr|y|e0%JzqF&WfGTYHo5;(8tJp=7EGKVe-v9^?C zmTaikAz_}!8pS(or_ogYmI#VGTfhept9kET7J(n%grLYYe?a-W0%BI55j?&ET#~yf=9j;OoYD2nyG%52{z|WJA9l^ul|IKFN*%u zx8FtG6E5jjN*v;(lSyby&6_CTP^A*i1z0W+9$}Je*-hl5F3qo$2q^2}_*c7$b7poF z!}A-*fIKk7mWe$av2A)~Ddmhef>(?Y4<<$ND47d%maN9X)192Y(h=m+H?YVH^WR!2 zy3l=;d8$8gSY=f%JbDhHeU1*-PPss0J=0Lp%7_KnZL4ok&cdv4rQtl54VpX`pEXkL z`b!k*N*pt{=#^Eh>?9@=Su}9JTmb}}*)GtRk(>uKn3a0VXX0I%SrnE}CSeBP6=oR{ zL8&;X1s{XsNSH%F*c2?mMBy><8;EjxTG-~Kf~{jxpM-}`0oGXYiVoqA_fD($>xld_ z-i(N@QDw?Del|@d)b=wMy1P6L+9`Zxkzyv^uP^(`3d9Mc_o3-DHI~DGDG1V2$HFVca$JRRE zLCENKAWRZkR{Yu2g?icHz?1(V>-+OuPGxk!Qeo=5Qm#3HnQZtWF(UN|o)Xm@cu-A- z2dsUlL<8F-gy&V~#(penL?R%^Ch@N0a>+og1z`|anlhkU`HRn{jC$J=JO@{%o8miE zR4QY5c4l`VR6YNF2!&^UuR64-!>RCu73-|oiSJr(u-N{cZ~IJKc(v-Gcn%4D2R?I6 zyk;}6t2js0acK95>$fBAYrqHOOi``+>6-js^#nz1AJocqM_TatKC2*07bq{*=gpry zr8Lo^SK3bK6wrASg-oxo-GrY)^+`F$dkxSxrt2-|9$^uG)@bY>vtlo8QMevYuv@Q8 z50`-=L2ZIug&1FS!vaJzuDdel%nBBB5H=aGX4Lk1*(v5o1+}VPU{LVn?^uJ+!JVGd zI6d89Lq+uVd`z3Hgy|Qft7{#Jf{_p1;Gk7uU7gcK-8B?$*%K_{N*#r*`-@6>DoQzNfRlUfOhzB*!$-8) z^6LWKYIf>-c`FM(iI=cr{4(SJzLNwB@f`jL-|~3-eUlw+RVR)#s?nBISRJA2gm);2 z`x{%6oOXZQt^WpAQ|k)Zx5frEsqnP=0)Z*&U~TX`38AXVI}K@6&GVGGHgIWp=(eS@ z7yypE-=&Ap+#2~O6*+RG{?j={`VTl0b{>mJc{8P9*cGkkv1G0%@WXeX)lFo)tsFfD zOD~I6s#Z(rJJw@!GS~*y%@EbyYnUfX7a3w3Z?ab;EOsoKy=$^4qOO)L@i{DzX5^M& ztT!F$E&_W9+k7%V`Fk;X72f(*EbFM)KkXnHMc9k?8Hi1+#-* zKxl@pu~O7?M|*qXo~}{VXZsmK3R5k~@$vPUyMe?Mo-shwmQlA*bg92P-yY&Le@mYi zCe3?P=2X93sxQ-T;`#NL#^@Azpl5czEhkTZDh`?5X#%}e)Y*ON3251HL(C^fyiDiR zBUA#jyt8av$=LE?A!=Ej4)kaX%Jk?#Y>?qb^^8PcL-rp@=k&FoW85nNcPby0n&;(! z7ip)Kkt5i~TsHqr$1$uBK)Tm*ucbpYDBpTSfm%$~H3sFYaB|4q?zm11XmdtWVd0Rk z8w3sFglT)@_%;~#WzaCiuf8STR%f2f%a`0%(f?MLf?s-0Ik^f&Qynnjh80JG$J)8( z-y?NToJbw6%V}q2CD|U?4%f>o-nE^&^qjLuIKVP$FVF;*sCeG>Fkc=zX!D?&C2l&s zl^l^v#7=0*>zRHJ@l5N$Z`XOPGMJ5St?H?*gZJC`GW=;WGdKkzA-pZ4H`GeHW?_1y|8rqSX4WhOFLplFo4bBHjsaA(|5vxDLyF_s z!*9pyg1?BoJih$MOfNwJMt>CMoy_s!6lDIIm#`c_Hb$vF&3cT$O0F<;j7tc!nhd#}jnyeB(*}h1*(6`nfa)>!T60&eK<{ z1S{52q+aEa!ObhiAvZ_-Y={WWMv^YuS_&3Y+n`voSgS8i1!m2vFP1&=Y6NNIoT<>w z!BBOJATChKNDWG#CKWMI?*lTe#iW>3O!d9-ROP;^x<#mkqZVuy*ZSDtu<>a!A$ifw zuZf_3@$SqP47gyY&wHtB_>P7SO`NY)2+)JBzKWZdpER1LK@Vt?GL+0+^_ZaE(-7$E zkuyDna9MP?Hl-a{U`i2l_HA0PIN#P>#@z`o_W45maP?>0?=;~J6B<;nW29+VbOGpI z71}ATN4+YZHc`0D;b&Y_HC87A$_pyB;e@tfJ4<0|DYILr(Va`AU!qG8D8@4@<#abI zI5BRFO0V5e4P1U7of4wp#IKhzsg}mzDpR7_D0A)3Tox;TeWEmc)WD4?)lTHFT)K~P z?Y0?eRZ1S*!UT^5R4ap*zIf(rJmJ#%_Xt{~LF{|j7wpt68zcCcK6&htT*QAhr-)+8 z=cgUAwCy?2Zr$zap`E#Yk{LbM+U_+)Sw(^%V!EGrfO4eSlWv-|N`$E06Q#1%>l`?$ zj0IIX=5x;IPOoa}+qk>9X(zLd&`Mu>$D?3kS?f)LuXzw9>z^@f_P0oC#xZm&=Q@=h zFkRE3QAOubxOh|0T9#4DT6GfOe+n4-4HmF_#flA?X@tnzV?uQm--3grXh!x!a|$%G zHWQ9to=BTy4tqp?pw~k7%yerkaT{*!CL*~lyqQCia8b#3m>r!HAZujrXdkqJ;Q-s1C#->_jJGvu_`DWufB4mXXWBg{ z;XHg?x)!;;#a6O_fXz6i&3AdHFh)DCiJ|Dhgf9R(jmcUywNWr~ahc3W9#jAb0Z9tjc6W+=Erh zOorDctGknJ#&;J@Q}fYyV0*{o01{C_3Xvgkj0o9Qf|#i$r*r9 z?eA-2n={%fX;>ZS?+wgLJn<|yvddXNr0bENFTGvG>0BZQd6||^GB@=I_$R|d*4{IQ>?wZ^T&nwo{gc-ul7r-xM}uH7v0EuKGv59F5h zSUL@gPG740PK4JF%x}dn)f~)HtmnRkOjApkNXM>ucYCvVejEc>JiPBmh26sLE@wo|yo(~(W)Yb5AW zZ60h>L?RaVIVk?}$Na|`u8`2MH?^hK5i<#Zt>{7|JMd>Ivhf3>Z@z-Vd4h-McuL+3 zlX8}dUUh-ePf{Wcsz{F?y(IoSyT0aQUGyCyDlGoxe?Go*@)~yaLd2w}5>D2J?N;h) zv$XtXsYW3GL@wfo7)S#>98gpBGF0{CjB3|&E>LP`fUqPpSlIY@AK1%q@*!6WU>|1^ zcy9LdnRnuzZfU>C^jm!=p#PcIpd~E1jP%~&2_s~lPcWG<@;Cn337!tSL9?o2qof&Ng^5|P)4fyhPelUCcB^g6ar_o>3(DdHw~k1 z!SP(rtQkmma2U+_{Gs!19pp*vJ6Qa?R>){*ZpXX8M&js0Z43VVOZZJ8tATQ%Ea zs_vRiJZAg8o=j|DQm^I zpLG0T?Kgwr2#}CLYz7LvRL=vdxpY18g883i)C6|&Rnk7SPtq>0ciNoi&?dOdv_y#C zB}f5(D#VHDQ}&NC*%sOUL|7cGlL3Yz6MCPF^juOmPae(b>hUB{gPW9j6a~i{ZVnoE zSnRC4zUZcRrFbL=qFD zEc9x0E7~d}pC_x)_{FnHJYeB)C{Da3?2erF=FA>1f{8pMIC zOe6-e1Oj13Y=f%H?gI@tg0iK}dWJJ;2b5-WSrmr&KUjebCTp%TK(Xd-WXa`tBx1hj26T^ptO4`$#ce zrjnrF)M2dK#v3fe93Y8(UCmU1u&kmf#oi?HwEI(e|$Wjb95hWH7 zMXA^!TZtNuz3S!h)nTbSAW5<@Vup+0hs zE8;So!&qBp5M=~t5vGsbxC)`sHzG{Ulom}!pNmA=T!+12xcQ#f)G}GHw%!3oAC!3P zz_537O5}t@3?-<+VkqjMpH9oWzs|jIjF`#H6GFzINEXKuGPb=jO^K2ZqwYg#SwcdeV53&o z92vcFr{=P_UBQP_MVJ{D5~cP4B|1b4vo_(@fR4;l_{p`CRIKdGz}q%@eSil^u^Vee zZP6<|GU7|2qJ7USWh3j!*8_*d-D$g{eV6Yi#4~jJksq=Iz&6rNOR?b zrXN{CP4lX>E1Iwau_7RFXN578)L10dR?+4# z==u<)6-Qib=wNF4ax3S0wM(!l+e^42((j=dyS%NjGCA`;9Rn|5a|F<^Uu?ZPx6ft;E{bK#c}kF zBUjVRX`PdP`atf2!Y#5Bu+s}qDSs^h(XTXjn_+Rxb;#eXg|G*0DT5+yXbZv%7J`+Z z+;>YjNA8v(5S*nS?mUMo-$a_>o%}kROmVXW8Tq+h2ozaUXRr>ZxAAIIUX&=*Q`iU`DKlnRrdlNXAT58L#bQK~`KTdGZHi{=OJ6a_YG)kD;Yo5HuKbBT zzz?&8EnyZLe2gMD!^z^=kJ|}SLEZ)a8;InaE)SwkAy!IAs$IRPLZFggA0o%hh!Y^2 z=&+}lT|Il0tW{;{Bi)M0L@L13CI=Ov0@f-_2V7^8|_ zs=^m)05$>lOR*eVV~t%wogLTYJnPZ(FV7Upm)<*GTCtNhzmRp4i?ekTue%*)g`tCeJ{t?wsQGtNGX-H>f9+7@tZQmY-o4K^RD13L8>VYTNq?hb3hDYlk1 z?-xRmnk-4b$+66D5JY%z9y1c>BYOnDaw&x@o~!2`BX0e%kQEMYp%p-NGV>6>Ww`w0 zBDO&1l<}a^(i)1CekUWb75?m6+PX^SQ*mm^4u;AAE_E(XPN?AwM(8jhK43$SHifD# z#A_2%ko!Pf{mUUeCK~3LxO@?0b(2XfW=(>tAzYmAowNTF!z|aKm05?${`z|wZq4Aj zMM!=RAjoVMA$g}}-td?2HdJ-#Cb zpkPX0@2WaqgX*Mum!PXEga*$sn~)4oBfp**7Oz3tRUt7BTPI#Oq=|ZG4fYJ$C*@+r zauhiGc`oyydAVvByjcZ7!h6t%4)C!dOq{Z%r7=R1h}q?9`fnE+HyU@J^S=-J_Ai9&9{B~hG~a+|eeUx8ts^(;w$ z>NpVCpZ>hI)gegIDk}eB_^so+>Co~WIrPV5hcq+rUCnT8)bHJ!5m%nPY@e=ic5jY7 zWQ1P2ATm;RD?IC0ZEPolwp`3Ku$ZI=OlU135%l+Ad)6)8E|B&M4M7Wv*hyreum;F^)`{; zBr2p85^tgyUF6k_yXeOV>o0K@Rn(H=j_isFb``^n6*_k>2<8neN?X;ESv?}gRf+hlk4P_fM;9a>i??7n{BfD)$DJ!L z9=h@>tG1)0K0C*w=}pf1sgk$O&IqTcm*rf9GTc14%4uHdyu4NOZKZQ&26v0p`n99y z27{$_OPSWp)om1(SCchA0(}==njd(JkyiX=gM(7?) zAC;$TRWWb|AuW9R6mwofv!imN9 zGPaR)`Rd6c(^LgiRaH-$c2rLFoAyfq?)nT)RqH?(l)a@%TlBiIE1^p7I{SoBYe5$y z&?PehR*bYeOp1yb>O(4PIC27e1##e`PJ{-2(-mWP&}kRKbr@`Z-l#TbTAsu?^plQhR;xvm`{-DVdTRy_iO3k zxZ6h9-XeS*pt85#fZG=g=79fzd`pUIAgObJItDS%Y_KnA7Q4+efTsne8`MP<75p3E zA!m)>@44N$J%NvQPMpq;(%BBw&)-fc9B~9}Nf=yi-X;aw;XdZ)S&w{1?K;O-l82I$ z;1zZd;vzxhog@S2NC@XZ;ZPLx%O;{8#PsS*zn_TTQGwwn@{8qzJ9Kd`<~Z!}q~3JS z%^Qya_}a~k?2C!=u~XkRDn^XQDDkmJR_A1i>$Y(FonGZdF7p&$vdMp6-;<20@SM7| z{^4?Pm1oW;&vofJRW+U<=kw!wWnU8H^P}d^z|y|t`_!H1RLFP&z3&gE)2l?d%tDz* z9@Ae(8IAgFPx;%nUEE&2Hzm&N%P^C?b*I_gsPi>@j|7CxBx;Ru-UI#Zh|zjO8=#A; zB8C6vm0|y1UKv4G7YlnQN*!i)wm%oU4g&*&>^m5~gVQ^>yo2RC*u8@e1IwR_fkEXR zEZ)KX9c0)F0Z|43bX!{ z(K}fEweSwc@8CiO68=*%?DD?_=kPvf@(yP2@Yn6%SpTy4%h2O}g7zK$$@Oo9f0OZV zBY#!?rr@vMzX|-E`>XdCY^XpAPWC3Q#-{(CYngX9|DFGTI&yy(_*a_#di&?x-;U?I zz~89;MdB}$zumt<{@=#p{O;nfpTDhtWy+QcByH;HZtrB`{BJkx@16PAWAT^CKS@i! z8~Z!**W%y!{*~Xqcjm8`|4RD*FWuJvp;hj8xqnjm`^^5!_rKBnPYhB4{@+K;(%w$w zPq8|XQiO|%fr*KMg^`tklbMNynTCOhf`NhJkDRQ%$^Rsx;$-OHU~2NG^4-wJ*%Sa$ zP!ZLj7jw0-F*3BX`%@85sbXR24E%FCn3$=UYS04JOr4znxCS!QGyd_&$;8IWOvgme z_NUaIg@K-l`HzCB22jS*%@p{D6(beU+QHDp!r9ai==KjqHhN}yW~%q<`hQftGyN|M zgNmnvDG(%YWTot43joOh8JXV8*`1tSfb9QY5j>EMjr~8~X+u=bJLP`iab>TK9%OW?Nqfk2xAz9IB1?(Caz1(I))fRks5KV{WFu8h*r1P7mlIt&(6h z0c?-_Bv!Jd%#iVPs^I_mc>e-pqDhM&B$X;gq7t@hcY|DTm?DW&_#Dnjmbqb_1`x*G zbf)aGOKVEehLJ8YXM(D$97htGV%jvhsWZjPcScJQXZF^h-mC8ob2R zM!=kBE(>)NJ2i5;BLT-a7?L-4c#il(P*dEU#=yyJ{w=oT))%?;s{}_sVby0QaqXmeJ z{SU7RbnM!XBc!fhPmD;eJ88b14Q=Ht9n;1?uPheYCt_6f^NCB6M%b?1?{OL;)0!vS zYU*mvG4%UoxTzJ8!A;<`S)FDxOz}RuY)%J}dZgB~vHW)Qw6E)obH*yOT&x*O0~4Jq zzj}pWj9L(GbX%Sy1pC}nbXHnjG`}O(=Dsc7c=O9At9G#eo73uT!VQyv-!dFe>P)Z2 z>1AKWl;n2*fr<<8<1s+&hjUiS#Rd;bhh?2k&Mz;lY?*luZb7g z*s9&9+TP9%&DEE$_P2FSlscN~=heIZV6;FU=l^DhrA zc2;%~b|GOA1{P5fF(JV}s|5?Qkf0zVn;0MPe-`=UBwWH)CvIa6jJD(bG}X z4v=7C1ZRXNf#44xh3pv-7(wd66Brp9sDvUN>nlQm&*A)>0w3OpUHfP8!m%|118W+E f;`pywa&|Fva`ABblNUxN&OeI+8JVcO7~uZ_rNo!0 literal 0 HcmV?d00001 diff --git a/Tests/test_basic_features.py b/Tests/test_basic_features.py index f3a41fe41..63b2a0a66 100644 --- a/Tests/test_basic_features.py +++ b/Tests/test_basic_features.py @@ -2,9 +2,9 @@ import pytest -from PyPDF2 import PdfFileWriter, PdfFileReader -from PyPDF2.utils import PdfReadError +from PyPDF2 import PdfFileReader, PdfFileWriter from PyPDF2.pdf import convertToInt +from PyPDF2.utils import PdfReadError TESTS_ROOT = os.path.abspath(os.path.dirname(__file__)) PROJECT_ROOT = os.path.dirname(TESTS_ROOT) @@ -12,50 +12,50 @@ def test_basic_features(): - output = PdfFileWriter() - document1 = os.path.join(RESOURCE_ROOT, "crazyones.pdf") - input1 = PdfFileReader(document1) + writer = PdfFileWriter() + pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf") + reader = PdfFileReader(pdf_path) # print how many pages input1 has: - print("document1.pdf has %d pages." % input1.getNumPages()) + print("document1.pdf has %d pages." % reader.getNumPages()) # add page 1 from input1 to output document, unchanged - output.addPage(input1.getPage(0)) + writer.addPage(reader.getPage(0)) # add page 2 from input1, but rotated clockwise 90 degrees - output.addPage(input1.getPage(0).rotateClockwise(90)) + writer.addPage(reader.getPage(0).rotateClockwise(90)) # add page 3 from input1, rotated the other way: - output.addPage(input1.getPage(0).rotateCounterClockwise(90)) + writer.addPage(reader.getPage(0).rotateCounterClockwise(90)) # alt: output.addPage(input1.getPage(0).rotateClockwise(270)) # add page 4 from input1, but first add a watermark from another PDF: - page4 = input1.getPage(0) - watermark_pdf = document1 + page4 = reader.getPage(0) + watermark_pdf = pdf_path watermark = PdfFileReader(watermark_pdf) page4.mergePage(watermark.getPage(0)) - output.addPage(page4) + writer.addPage(page4) # add page 5 from input1, but crop it to half size: - page5 = input1.getPage(0) + page5 = reader.getPage(0) page5.mediaBox.upperRight = ( page5.mediaBox.getUpperRight_x() / 2, page5.mediaBox.getUpperRight_y() / 2, ) - output.addPage(page5) + writer.addPage(page5) # add some Javascript to launch the print window on opening this PDF. # the password dialog may prevent the print dialog from being shown, # comment the the encription lines, if that's the case, to try this out - output.addJS("this.print({bUI:true,bSilent:false,bShrinkToFit:true});") + writer.addJS("this.print({bUI:true,bSilent:false,bShrinkToFit:true});") # encrypt your new PDF and add a password password = "secret" - output.encrypt(password) + writer.encrypt(password) # finally, write "output" to PyPDF2-output.pdf with open("PyPDF2-output.pdf", "wb") as outputStream: - output.write(outputStream) + writer.write(outputStream) def test_convertToInt(): diff --git a/Tests/test_javascript.py b/Tests/test_javascript.py index d49f4dc26..4048a76f0 100644 --- a/Tests/test_javascript.py +++ b/Tests/test_javascript.py @@ -1,4 +1,5 @@ import os + import pytest from PyPDF2 import PdfFileReader, PdfFileWriter @@ -8,21 +9,28 @@ PROJECT_ROOT = os.path.dirname(TESTS_ROOT) RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "Resources") + @pytest.fixture def pdf_file_writer(): - ipdf = PdfFileReader(os.path.join(RESOURCE_ROOT, "crazyones.pdf")) + reader = PdfFileReader(os.path.join(RESOURCE_ROOT, "crazyones.pdf")) pdf_file_writer = PdfFileWriter() - pdf_file_writer.appendPagesFromReader(ipdf) + pdf_file_writer.appendPagesFromReader(reader) yield pdf_file_writer + def test_add_js(pdf_file_writer): - pdf_file_writer.addJS( - "this.print({bUI:true,bSilent:false,bShrinkToFit:true});" - ) + pdf_file_writer.addJS("this.print({bUI:true,bSilent:false,bShrinkToFit:true});") + + assert ( + "/Names" in pdf_file_writer._root_object + ), "addJS should add a name catalog in the root object." + assert ( + "/JavaScript" in pdf_file_writer._root_object["/Names"] + ), "addJS should add a JavaScript name tree under the name catalog." + assert ( + "/OpenAction" in pdf_file_writer._root_object + ), "addJS should add an OpenAction to the catalog." - assert "/Names" in pdf_file_writer._root_object, "addJS should add a name catalog in the root object." - assert "/JavaScript" in pdf_file_writer._root_object["/Names"], "addJS should add a JavaScript name tree under the name catalog." - assert "/OpenAction" in pdf_file_writer._root_object, "addJS should add an OpenAction to the catalog." def test_overwrite_js(pdf_file_writer): def get_javascript_name(): @@ -31,14 +39,12 @@ def get_javascript_name(): assert "/Names" in pdf_file_writer._root_object["/Names"]["/JavaScript"] return pdf_file_writer._root_object["/Names"]["/JavaScript"]["/Names"][0] - pdf_file_writer.addJS( - "this.print({bUI:true,bSilent:false,bShrinkToFit:true});" - ) + pdf_file_writer.addJS("this.print({bUI:true,bSilent:false,bShrinkToFit:true});") first_js = get_javascript_name() - pdf_file_writer.addJS( - "this.print({bUI:true,bSilent:false,bShrinkToFit:true});" - ) + pdf_file_writer.addJS("this.print({bUI:true,bSilent:false,bShrinkToFit:true});") second_js = get_javascript_name() - assert first_js != second_js, "addJS should overwrite the previous script in the catalog." + assert ( + first_js != second_js + ), "addJS should overwrite the previous script in the catalog." diff --git a/Tests/test_merger.py b/Tests/test_merger.py index 49048a741..959560d4a 100644 --- a/Tests/test_merger.py +++ b/Tests/test_merger.py @@ -14,6 +14,7 @@ def test_merge(): pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf") outline = os.path.join(RESOURCE_ROOT, "pdflatex-outline.pdf") pdf_forms = os.path.join(RESOURCE_ROOT, "pdflatex-forms.pdf") + pdf_pw = os.path.join(RESOURCE_ROOT, "libreoffice-writer-password.pdf") file_merger = PyPDF2.PdfFileMerger() @@ -23,20 +24,24 @@ def test_merge(): file_merger.append(pdf_path, pages=PyPDF2.pagerange.PageRange(slice(0, 0))) file_merger.append(pdf_forms) - # PdfFileReader object: - file_merger.append(PyPDF2.PdfFileReader(pdf_path, "rb")) + # Merging an encrypted file + pdfr = PyPDF2.PdfFileReader(pdf_pw) + pdfr.decrypt("openpassword") + file_merger.append(pdfr) - # Is merging encrypted files broken? - # encrypted = os.path.join(RESOURCE_ROOT, "libreoffice-writer-password.pdf") - # reader = PyPDF2.PdfFileReader(pdf_path, "rb") - # reader.decrypt("openpassword") - # file_merger.append(reader) + # PdfFileReader object: + file_merger.append(PyPDF2.PdfFileReader(pdf_path, "rb"), bookmark=True) # File handle with open(pdf_path, "rb") as fh: file_merger.append(fh) - file_merger.addBookmark("A bookmark", 0) + bookmark = file_merger.addBookmark("A bookmark", 0) + file_merger.addBookmark("deeper", 0, parent=bookmark) + file_merger.addMetadata({"author": "Martin Thoma"}) + file_merger.addNamedDestination("title", 0) + file_merger.setPageLayout("/SinglePage") + file_merger.setPageMode("/UseThumbs") file_merger.write("dont_commit_merged.pdf") file_merger.close() diff --git a/Tests/test_page.py b/Tests/test_page.py index 5b15f9f58..91c49bab1 100644 --- a/Tests/test_page.py +++ b/Tests/test_page.py @@ -1,5 +1,7 @@ import os +import pytest + from PyPDF2 import PdfFileReader TESTS_ROOT = os.path.abspath(os.path.dirname(__file__)) @@ -7,15 +9,40 @@ RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "Resources") -def test_page_operations(): +@pytest.mark.parametrize( + "pdf_path, password", + [ + (os.path.join(RESOURCE_ROOT, "crazyones.pdf"), None), + (os.path.join(RESOURCE_ROOT, "attachment.pdf"), None), + (os.path.join(RESOURCE_ROOT, "side-by-side-subfig.pdf"), None), + ( + os.path.join(RESOURCE_ROOT, "libreoffice-writer-password.pdf"), + "openpassword", + ), + (os.path.join(RESOURCE_ROOT, "imagemagick-images.pdf"), None), + (os.path.join(RESOURCE_ROOT, "imagemagick-lzw.pdf"), None), + ], + ids=[ + "crazyones", + "attachment", + "side-by-side-subfig", + "libreoffice-writer-password", + "imagemagick-images", + "imagemagick-lzw", + ], +) +def test_page_operations(pdf_path, password): """ This test just checks if the operation throws an exception. This should be done way more thoroughly: It should be checked if the output is as expected. """ - pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf") reader = PdfFileReader(pdf_path) + + if password: + reader.decrypt(password) + page = reader.pages[0] page.mergeRotatedScaledPage(page, 90, 1, 1) page.mergeScaledTranslatedPage(page, 1, 1, 1) @@ -26,3 +53,23 @@ def test_page_operations(): page.scaleTo(100, 100) page.compressContentStreams() page.extractText() + + +@pytest.mark.parametrize( + "pdf_path, password", + [ + (os.path.join(RESOURCE_ROOT, "crazyones.pdf"), None), + (os.path.join(RESOURCE_ROOT, "attachment.pdf"), None), + (os.path.join(RESOURCE_ROOT, "side-by-side-subfig.pdf"), None), + ( + os.path.join(RESOURCE_ROOT, "libreoffice-writer-password.pdf"), + "openpassword", + ), + ], +) +def test_compress_content_streams(pdf_path, password): + reader = PdfFileReader(pdf_path) + if password: + reader.decrypt(password) + for page in reader.pages: + page.compressContentStreams() diff --git a/Tests/test_pagerange.py b/Tests/test_pagerange.py index b213de5a0..05bd5fee0 100644 --- a/Tests/test_pagerange.py +++ b/Tests/test_pagerange.py @@ -3,6 +3,18 @@ from PyPDF2.pagerange import PageRange, ParseError, parse_filename_page_ranges +def test_equality(): + pr1 = PageRange(slice(0, 5)) + pr2 = PageRange(slice(0, 5)) + assert pr1 == pr2 + + +def test_equality_other_objectc(): + pr1 = PageRange(slice(0, 5)) + pr2 = "PageRange(slice(0, 5))" + assert pr1 != pr2 + + def test_idempotency(): pr = PageRange(slice(0, 5)) pr2 = PageRange(pr) diff --git a/Tests/test_reader.py b/Tests/test_reader.py index 6cf736d59..d4cbafb12 100644 --- a/Tests/test_reader.py +++ b/Tests/test_reader.py @@ -1,7 +1,10 @@ import io import os + import pytest + import PyPDF2.utils +from PyPDF2 import PdfFileReader from PyPDF2.filters import _xobj_to_image TESTS_ROOT = os.path.abspath(os.path.dirname(__file__)) @@ -9,16 +12,44 @@ RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "Resources") -def test_read_metadata(): - with open(os.path.join(RESOURCE_ROOT, "crazyones.pdf"), "rb") as inputfile: - ipdf = PyPDF2.PdfFileReader(inputfile) - metadict = ipdf.getDocumentInfo() - assert metadict.title is None - assert dict(metadict) == { - "/CreationDate": "D:20150604133406-06'00'", - "/Creator": " XeTeX output 2015.06.04:1334", - "/Producer": "xdvipdfmx (20140317)", - } +@pytest.mark.parametrize( + "pdf_path, expected", + [ + ( + os.path.join(RESOURCE_ROOT, "crazyones.pdf"), + { + "/CreationDate": "D:20150604133406-06'00'", + "/Creator": " XeTeX output 2015.06.04:1334", + "/Producer": "xdvipdfmx (20140317)", + }, + ), + ( + os.path.join(RESOURCE_ROOT, "metadata.pdf"), + { + "/CreationDate": "D:20220415093243+02'00'", + "/ModDate": "D:20220415093243+02'00'", + "/Creator": "pdflatex, or other tool", + "/Producer": "Latex with hyperref, or other system", + "/Author": "Martin Thoma", + "/Keywords": "Some Keywords, other keywords; more keywords", + "/Subject": "The Subject", + "/Title": "The Title", + "/Trapped": "/False", + "/PTEX.Fullbanner": ( + "This is pdfTeX, Version " + "3.141592653-2.6-1.40.23 (TeX Live 2021) " + "kpathsea version 6.3.3" + ), + }, + ), + ], + ids=["crazyones", "metadata"], +) +def test_read_metadata(pdf_path, expected): + with open(pdf_path, "rb") as inputfile: + reader = PdfFileReader(inputfile) + metadict = reader.getDocumentInfo() + assert dict(metadict) == expected @pytest.mark.parametrize( @@ -29,16 +60,14 @@ def test_read_metadata(): ], ) def test_get_annotations(src): - reader = PyPDF2.PdfFileReader(src) + reader = PdfFileReader(src) for page in reader.pages: - print("/Annots" in page) if "/Annots" in page: for annot in page["/Annots"]: subtype = annot.getObject()["/Subtype"] if subtype == "/Text": - print(annot.getObject()["/Contents"]) - print("") + annot.getObject()["/Contents"] @pytest.mark.parametrize( @@ -49,7 +78,7 @@ def test_get_annotations(src): ], ) def test_get_attachments(src): - reader = PyPDF2.PdfFileReader(src) + reader = PdfFileReader(src) attachments = {} for i in range(reader.getNumPages()): @@ -71,7 +100,7 @@ def test_get_attachments(src): ], ) def test_get_outlines(src, outline_elements): - reader = PyPDF2.PdfFileReader(src) + reader = PdfFileReader(src) outlines = reader.getOutlines() assert len(outlines) == outline_elements @@ -79,13 +108,17 @@ def test_get_outlines(src, outline_elements): @pytest.mark.parametrize( "src,nb_images", [ - (os.path.join(RESOURCE_ROOT, "pdflatex-outline.pdf"), 0), - (os.path.join(RESOURCE_ROOT, "crazyones.pdf"), 0), - (os.path.join(RESOURCE_ROOT, "git.pdf"), 1), + ("pdflatex-outline.pdf", 0), + ("crazyones.pdf", 0), + ("git.pdf", 1), + ("imagemagick-lzw.pdf", 1), + ("imagemagick-ASCII85Decode.pdf", 1), + ("imagemagick-CCITTFaxDecode.pdf", 1), ], ) def test_get_images(src, nb_images): - reader = PyPDF2.PdfFileReader(src) + src =os.path.join(RESOURCE_ROOT, src) + reader = PdfFileReader(src) with pytest.raises(TypeError): page = reader.pages["0"] @@ -106,11 +139,13 @@ def test_get_images(src, nb_images): with open(filename, "wb") as img: img.write(byte_stream) images_extracted.append(filename) - else: - print("No image found.") assert len(images_extracted) == nb_images + # Cleanup + for filepath in images_extracted: + os.remove(filepath) + @pytest.mark.parametrize( "strict,with_prev_0,should_fail", @@ -153,10 +188,10 @@ def test_get_images_raw(strict, with_prev_0, should_fail): ) pdf_stream = io.BytesIO(pdf_data) if should_fail: - with pytest.raises(PyPDF2.pdf.utils.PdfReadError): - PyPDF2.PdfFileReader(pdf_stream, strict=strict) + with pytest.raises(PyPDF2.utils.PdfReadError): + PdfFileReader(pdf_stream, strict=strict) else: - PyPDF2.PdfFileReader(pdf_stream, strict=strict) + PdfFileReader(pdf_stream, strict=strict) @pytest.mark.xfail( @@ -167,5 +202,5 @@ def test_get_images_raw(strict, with_prev_0, should_fail): ) def test_issue297(): path = os.path.join(RESOURCE_ROOT, "issue-297.pdf") - reader = PyPDF2.PdfFileReader(path, "rb") + reader = PdfFileReader(path, "rb") reader.getPage(0) diff --git a/Tests/test_utils.py b/Tests/test_utils.py index a305dff7e..fb31edb9f 100644 --- a/Tests/test_utils.py +++ b/Tests/test_utils.py @@ -1,6 +1,14 @@ +import io +import os + import pytest + import PyPDF2.utils -import io +from PyPDF2 import PdfFileReader + +TESTS_ROOT = os.path.abspath(os.path.dirname(__file__)) +PROJECT_ROOT = os.path.dirname(TESTS_ROOT) +RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "Resources") @pytest.mark.parametrize( @@ -10,6 +18,10 @@ def test_isInt(value, expected): assert PyPDF2.utils.isInt(value) == expected +def test_isBytes(): + assert PyPDF2.utils.isBytes(b"") + + @pytest.mark.parametrize( "stream,expected", [ @@ -73,3 +85,15 @@ def test_matrixMultiply(a, b, expected): def test_markLocation(): stream = io.BytesIO(b"abde" * 6000) PyPDF2.utils.markLocation(stream) + + +def test_ConvertFunctionsToVirtualList(): + pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf") + reader = PdfFileReader(pdf_path) + + # Test if getting as slice throws an error + assert len(reader.pages[:]) == 1 + + +def test_hexStr(): + assert PyPDF2.utils.hexStr(10) == "0xa" diff --git a/Tests/test_workflows.py b/Tests/test_workflows.py index 9c8f17408..821fa7a49 100644 --- a/Tests/test_workflows.py +++ b/Tests/test_workflows.py @@ -1,8 +1,9 @@ # -*- coding: utf-8 -*- -import os import binascii +import os import sys + import pytest from PyPDF2 import PdfFileReader diff --git a/Tests/test_writer.py b/Tests/test_writer.py index bd8cd9d8c..8356c94bc 100644 --- a/Tests/test_writer.py +++ b/Tests/test_writer.py @@ -1,9 +1,10 @@ import os + import pytest from PyPDF2 import PdfFileReader, PdfFileWriter -from PyPDF2.utils import PageSizeNotDefinedError from PyPDF2.generic import RectangleObject +from PyPDF2.utils import PageSizeNotDefinedError TESTS_ROOT = os.path.abspath(os.path.dirname(__file__)) PROJECT_ROOT = os.path.dirname(TESTS_ROOT) @@ -80,3 +81,31 @@ def test_remove_images(): # Cleanup os.remove(tmp_filename) + + +def test_write_metadata(): + pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf") + + reader = PdfFileReader(pdf_path) + writer = PdfFileWriter() + + for page in reader.pages: + writer.addPage(page) + + metadata = reader.getDocumentInfo() + writer.addMetadata(metadata) + + writer.addMetadata({"/Title": "The Crazy Ones"}) + + # finally, write data to PyPDF2-output.pdf + tmp_filename = "dont_commit_writer_added_metadata.pdf" + with open(tmp_filename, "wb") as output_stream: + writer.write(output_stream) + + # Check if the title was set + reader = PdfFileReader(tmp_filename) + metadata = reader.getDocumentInfo() + assert metadata.get("/Title") == "The Crazy Ones" + + # Cleanup + os.remove(tmp_filename) diff --git a/Tests/test_xmp.py b/Tests/test_xmp.py index 8fc7bf3a8..941f9d30d 100644 --- a/Tests/test_xmp.py +++ b/Tests/test_xmp.py @@ -1,6 +1,9 @@ import os + import pytest -import PyPDF2 + +import PyPDF2.xmp +from PyPDF2 import PdfFileReader TESTS_ROOT = os.path.abspath(os.path.dirname(__file__)) PROJECT_ROOT = os.path.dirname(TESTS_ROOT) @@ -15,9 +18,27 @@ ], ) def test_read_xmp(src, has_xmp): - with open(src, "rb") as inputfile: - ipdf = PyPDF2.PdfFileReader(inputfile) - xmp = ipdf.getXmpMetadata() - assert (xmp is None) == (not has_xmp) - if has_xmp: - print(xmp.xmp_createDate) + reader = PdfFileReader(src) + xmp = reader.getXmpMetadata() + assert (xmp is None) == (not has_xmp) + if has_xmp: + for el in xmp.getElement( + aboutUri="", namespace=PyPDF2.xmp.RDF_NAMESPACE, name="Artist" + ): + print("el={el}".format(el=el)) + + assert get_all_tiff(xmp) == {"tiff:Artist": ["me"]} + assert xmp.dc_contributor == [] + + +def get_all_tiff(xmp): + data = {} + tiff_ns = xmp.getNodesInNamespace( + aboutUri="", namespace="http://ns.adobe.com/tiff/1.0/" + ) + for tag in tiff_ns: + contents = [] + for content in tag.childNodes: + contents.append(content.data) + data[tag.tagName] = contents + return data