Skip to content

Commit

Permalink
PyPDF2 and PyMuPDF update
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinThoma committed Jul 1, 2022
1 parent 2182369 commit f126d3c
Show file tree
Hide file tree
Showing 31 changed files with 1,139 additions and 396 deletions.
12 changes: 6 additions & 6 deletions README.md

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,7 @@ def get_text_extraction_score(doc: Document, library_name: str):
version=PyPDF2.__version__,
watermarking_function=pypdf2_watermarking,
license="BSD 3-Clause",
last_release_date="2022-06-14",
last_release_date="2022-06-30",
),
"pdfminer": Library(
"pdfminer.six",
Expand Down Expand Up @@ -490,7 +490,7 @@ def get_text_extraction_score(doc: Document, library_name: str):
watermarking_function=None,
dependencies="MuPDF",
license="GNU AFFERO GPL 3.0 / Commerical",
last_release_date="2022-05-05",
last_release_date="2022-06-27",
),
"pdftotext": Library(
"pdftotext",
Expand Down
138 changes: 69 additions & 69 deletions cache.json
Original file line number Diff line number Diff line change
Expand Up @@ -222,103 +222,103 @@
},
"pymupdf": {
"1601.03642": {
"read": 0.021236896514892578
"read": 0.023758411407470703
},
"1602.06541": {
"read": 0.04923844337463379
"read": 0.048156023025512695
},
"1707.09725": {
"read": 0.18419218063354492
"read": 0.19010353088378906
},
"2201.00021": {
"read": 0.0506289005279541
"read": 0.052664756774902344
},
"2201.00022": {
"read": 0.03642082214355469
"read": 0.03505825996398926
},
"2201.00029": {
"read": 0.023745298385620117
"read": 0.02772665023803711
},
"2201.00037": {
"read": 0.10241866111755371
"read": 0.09012722969055176
},
"2201.00069": {
"read": 0.04053354263305664
"read": 0.04471254348754883
},
"2201.00151": {
"read": 0.21468496322631836
"read": 0.17026329040527344
},
"2201.00178": {
"read": 0.043529510498046875
"read": 0.06370210647583008
},
"2201.00200": {
"read": 0.03151369094848633
"read": 0.02881169319152832
},
"2201.00201": {
"read": 0.03332996368408203
"read": 0.031729936599731445
},
"2201.00214": {
"read": 0.41216444969177246
"read": 0.3913586139678955
},
"GeoTopo-book": {
"read": 0.24680590629577637
"read": 0.275968074798584
}
},
"pypdf2": {
"1601.03642": {
"read": 0.13181662559509277
"read": 0.11479783058166504
},
"1602.06541": {
"read": 0.44907617568969727,
"watermark": 1.4312434196472168
"read": 0.3940451145172119,
"watermark": 1.0545539855957031
},
"1707.09725": {
"read": 1.632399320602417,
"watermark": 6.472326278686523
"read": 1.992173433303833,
"watermark": 5.0714006423950195
},
"2201.00021": {
"read": 0.744328498840332,
"watermark": 0.911888599395752
"read": 0.43038320541381836,
"watermark": 0.9083621501922607
},
"2201.00022": {
"read": 0.22876715660095215,
"watermark": 0.8920032978057861
"read": 0.20012378692626953,
"watermark": 0.7139835357666016
},
"2201.00029": {
"read": 0.3686819076538086,
"watermark": 0.11223363876342773
"read": 0.33080124855041504,
"watermark": 0.08133554458618164
},
"2201.00037": {
"read": 0.8160533905029297,
"watermark": 2.120100975036621
"read": 0.6840682029724121,
"watermark": 1.6109654903411865
},
"2201.00069": {
"read": 0.32007670402526855,
"watermark": 1.1236357688903809
"read": 0.2880227565765381,
"watermark": 0.9331471920013428
},
"2201.00151": {
"read": 5.836317300796509,
"watermark": 16.197925567626953
"read": 5.154994010925293,
"watermark": 13.142579793930054
},
"2201.00178": {
"read": 0.33651185035705566,
"watermark": 1.1913495063781738
"read": 0.337890625,
"watermark": 0.8712546825408936
},
"2201.00200": {
"read": 0.48441600799560547,
"watermark": 0.53401780128479
"read": 0.4142639636993408,
"watermark": 0.4322509765625
},
"2201.00201": {
"read": 0.2817673683166504,
"watermark": 0.7810215950012207
"read": 0.25218749046325684,
"watermark": 0.6003615856170654
},
"2201.00214": {
"read": 20.626332998275757,
"watermark": 56.71852135658264
"read": 17.280361890792847,
"watermark": 45.37559413909912
},
"GeoTopo-book": {
"read": 4.307093143463135,
"watermark": 13.88920783996582
"read": 4.041578054428101,
"watermark": 11.496635913848877
}
},
"tika": {
Expand Down Expand Up @@ -448,36 +448,36 @@
"GeoTopo-book": 0.9338226113014887
},
"pymupdf": {
"1601.03642": 0.98920850946833,
"1602.06541": 0.9831085375326601,
"1707.09725": 0.9500410452707244,
"2201.00021": 0.9814699454128877,
"2201.00022": 0.9807169262536218,
"2201.00029": 0.9780213199185531,
"2201.00037": 0.9610911701363962,
"2201.00069": 0.9901584319162327,
"2201.00151": 0.9405941590124848,
"2201.00178": 0.9541360733822826,
"2201.00200": 0.9819674770568346,
"2201.00201": 0.9865230158884503,
"2201.00214": 0.9783667787959633,
"GeoTopo-book": 0.9654720602997376
"1601.03642": 0.9891412666231401,
"1602.06541": 0.9830822024204595,
"1707.09725": 0.950309201655275,
"2201.00021": 0.9813663284804848,
"2201.00022": 0.9802966619965753,
"2201.00029": 0.9776700191570882,
"2201.00037": 0.9601950117423229,
"2201.00069": 0.9902783350964871,
"2201.00151": 0.9404806212467183,
"2201.00178": 0.9543100759072476,
"2201.00200": 0.9818752599310429,
"2201.00201": 0.9864366899689843,
"2201.00214": 0.9784680209521233,
"GeoTopo-book": 0.9658770842721947
},
"pypdf2": {
"1601.03642": 0.988204501231209,
"1602.06541": 0.980342863635718,
"1707.09725": 0.9402626747634515,
"2201.00021": 0.9670789764515855,
"2201.00022": 0.97262612848869,
"2201.00029": 0.9768263473053892,
"2201.00037": 0.9390333156211463,
"2201.00069": 0.9634704048681239,
"2201.00151": 0.9336876943135092,
"2201.00178": 0.9294026050543948,
"2201.00200": 0.9749882273797511,
"2201.00201": 0.9825060668375452,
"2201.00214": 0.9725277368821147,
"GeoTopo-book": 0.8648303132107446
"1601.03642": 0.9882405605964084,
"1602.06541": 0.9804445075623092,
"1707.09725": 0.940554454886422,
"2201.00021": 0.9670187219754766,
"2201.00022": 0.9726852781683069,
"2201.00029": 0.9767748114449898,
"2201.00037": 0.9389782426736301,
"2201.00069": 0.963615494442941,
"2201.00151": 0.9335953951986522,
"2201.00178": 0.9295827985946015,
"2201.00200": 0.9749233170101705,
"2201.00201": 0.9824198955328604,
"2201.00214": 0.9727259608446774,
"GeoTopo-book": 0.8653162506825754
},
"tika": {
"1601.03642": 0.9558922725104059,
Expand Down
8 changes: 8 additions & 0 deletions read/results/pymupdf/1601.03642.txt
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ optimization technique called gradient descent. The gradient
descent algorithm takes a function which has to be derivable,
starts at any point of the surface of this error function and
arXiv:1601.03642v1 [cs.CV] 12 Jan 2016

2
makes a step in the direction which goes downwards. Hence
it tries to find a minimum of this high-dimensional function.
Expand Down Expand Up @@ -186,6 +187,7 @@ choice of features it is possible to separate the general style of
an image in terms of local image appearance from the content
of an image. They support their claim by applying the style of
different artists to an arbitrary image of their choice.

3
(a) Original Image
(b) Style image
Expand Down Expand Up @@ -266,6 +268,7 @@ With that training data, the models can generate similar texts.
New works which look like Shakespeare plays, new Wikipedia
articles, new Linux code and new papers about algebraic
geometry can thus automatically be generated. At a first

4
glance, they do look authentic. The syntax was mostly used
correctly, the formatting looks as expected, the sentences are
Expand Down Expand Up @@ -365,6 +368,7 @@ can be found at [Vit15].
C. Audio Synthesization
Audio synthesization is generating new audio files. This can
either be music or speech. With the techniques described before,

5
neural networks can be trained to generate music note by note.
However, it is desirable to allow multiple notes being played
Expand Down Expand Up @@ -547,6 +551,7 @@ W.H.Freeman & Co Ltd, 1976.
M. D. Zeiler and R. Fergus, “Visualizing and understanding con-
volutional networks,” in Computer Vision–ECCV 2014. Springer,
2014, pp. 818–833.

6
APPENDIX A
AUTOMATICALLY GENERATED TEXTS
Expand Down Expand Up @@ -592,6 +597,7 @@ was swear to advance to the resources for those Socialism’s rule,
was starting to signing a major tripad of aid exile.]]
C. Linux Code, 1
/*

7
* Increment the size file of the new incorrect UI_FILTER group information
* of the size generatively.
Expand Down Expand Up @@ -657,6 +663,7 @@ Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#include <linux/io.h>
#include <linux/platform_device.h>
#include <linux/multi.h>

8
#include <linux/ckevent.h>
#include <asm/io.h>
Expand Down Expand Up @@ -689,3 +696,4 @@ PUT_PARAM_RAID(2, sel) = get_state_state();
set_pid_sum((unsigned long)state, current_state_str(),
(unsigned long)-1->lr_full; low;
}

Loading

0 comments on commit f126d3c

Please sign in to comment.