From e3f60c1ae6092d350ffb2080ad53e23a6731f634 Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Sun, 24 Sep 2023 19:11:34 +0200 Subject: [PATCH] BUG: PDF size increases because of too high float writing precision (#2213) See #1910 address regression from #2203 --- pypdf/generic/_base.py | 7 +++++-- ..._Vicksburg_Sample_OCR-crazyones-merged.pdf | Bin 217836 -> 217096 bytes tests/test_generic.py | 2 ++ tests/test_writer.py | 5 ++++- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/pypdf/generic/_base.py b/pypdf/generic/_base.py index 6c3e41647..a50bb5faf 100644 --- a/pypdf/generic/_base.py +++ b/pypdf/generic/_base.py @@ -379,6 +379,9 @@ def readFromStream( return IndirectObject.read_from_stream(stream, pdf) +FLOAT_WRITE_PRECISION = 8 # shall be min 5 digits max, allow user adj + + class FloatObject(float, PdfObject): def __new__( cls, value: Union[str, Any] = "0.0", context: Optional[Any] = None @@ -409,8 +412,8 @@ def clone( def myrepr(self) -> str: if self == 0: return "0.0" - nb = int(log10(abs(self))) - s = f"{self:.{max(1,16-nb)}f}".rstrip("0").rstrip(".") + nb = FLOAT_WRITE_PRECISION - int(log10(abs(self))) + s = f"{self:.{max(1,nb)}f}".rstrip("0").rstrip(".") return s def __repr__(self) -> str: diff --git a/resources/Seige_of_Vicksburg_Sample_OCR-crazyones-merged.pdf b/resources/Seige_of_Vicksburg_Sample_OCR-crazyones-merged.pdf index 365637b94af1e4f8540ab347785d68b5f6b3574c..0e9633ac16c138eeaa90d3cf13e9f7cd6e2c006d 100644 GIT binary patch delta 1454 zcmZuwJ&P1U5M{kXbmXGNk3*EclZ%9*tE;N33j`xCJki`h(L|0DlM75XFi=pBg$WMs z5(AG-l@0tG`X3Da5mwLaXOA;5GyQl~@73#`zZY8{zixeh`=!}AyEzoZIyyhQW1?CN z-@Z*rYnY)#z7Fkoi#B&o+>J$vcNEE5phAt)cPD=a1DYO=A6~t$3l6E)-D>$|buHRH zO>(F4XVs>V*3Iv4ySCUIk}cGAPow9R5r_AdO$b6wCWC>D`K)?%^zzM%*Hw__$LH(K zt?~YVn#1w#^!)tSmCNK1?A{Dmr&cgI;#kY#TdW+x%&aixe%P9$>sHCC@L*Q9O72MHA&2AH3eX(J zIkRoix$KxTem3#`c=F+RGWdBsx%6rB0HZjcFL&gs_5&kP>S6L9?DYA;#&Dz_?C`9& zP*B^F0wdpRs}U@!H68)0os$9+DQ!{!%C#e7F1%Af>=6a~A}L@`BvG_2UK19tT$+{; zftS3j?|l+ZrEn74fhDoyB~kmO^0hFnAz&XPE)oQ=Pzy5vIAX>1h7iP5N-TLS8wNGs z1rKeyK9<_3S4;z9^sR=?Kc57U0~h-V?3If10Sw@aiGkVUN~}2{!$K?v;pl1AV9p;{ zqL#vfT%H9Gj9P372nm+4N{T&$lI(yKX({RxU3HVqD93W|0(gZ&I9Lef&lkY%)AOer Q``4^VtoHUEJ>IYW0p_tN+yDRo delta 2236 zcma)7&1w`u5GMY{)p!uai%Lfb>SgTeuIlO*au76lN}j-rD1t~NPcVW9b8tnNgP4%!}W77V~)ppZcM4_Svm$5C1rdGwWc($hs)FNsY{Hh;#|Pkx9Rek zdnn6FL=Pn7MO^b`czL~))b)sX$Ne=IUDcEFM(GI z4An*T1}Rh_?`blnM-s(aj0ZG*93{oV!+J{8_K>7LmlKm&1kU54$Wu{0^8QZ;VO>z7 z1vNT`1WX{?r!(xfCjWRafxC9sqOpacGg*3)wx)&rV z*`xvyVv^2xNYGia0|eNsGI>n?|nBPxEMecCm8djCH35Xa4BllAn8a|A{c3_ z!J|h_;z|0c#1MqoX`T2F1QAumK*X$U1(NrmCZWJl3#2HWSHyCt^eg})`dak_n2@!t zoO~gW6Ll+*LK3pqVp8zBbq*lVly`MD{Q^d-HoLZY9}l-qZk(4aVX(ILaBE}m4= 0.95 +@pytest.mark.skipif(GHOSTSCRIPT_BINARY is None, reason="Requires Ghostscript") def test_watermarking_reportlab_rendering(tmp_path): """ This test is showing a rotated+mirrored watermark in pypdf==3.15.4.