-
Notifications
You must be signed in to change notification settings - Fork 544
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add new page method "remove_rotation"
The new method sets a page's rotation to 0 while keeping the page's appearance and content unchanged. This can significantly simplify inclusion of the page in target pages via Page method "show_pdf_page()". Other changes are additions / corrections to "changes.txt" and improved wordings for in document.rst and page.rst. Also contained is a fix for issue 3402 (inability to insert/update fields with inter-field calculations.
- Loading branch information
Showing
11 changed files
with
243 additions
and
55 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
import fitz | ||
|
||
|
||
def gentle_compare(w0, w1): | ||
"""Check lists of "words" extractions for approximate equality. | ||
* both lists must have same length | ||
* word items must contain same word strings | ||
* word rectangles must be approximately equal | ||
""" | ||
tolerance = 1e-3 # maximum (Euclidean) norm of difference rectangle | ||
word_count = len(w0) # number of words | ||
if word_count != len(w1): | ||
print(f"different number of words: {word_count}/{len(w1)}") | ||
return False | ||
for i in range(word_count): | ||
if w0[i][4] != w1[i][4]: # word strings must be the same | ||
print(f"word {i} mismatch") | ||
return False | ||
r0 = fitz.Rect(w0[i][:4]) # rect of first word | ||
r1 = fitz.Rect(w1[i][:4]) # rect of second word | ||
delta = (r1 - r0).norm() # norm of difference rectangle | ||
if delta > tolerance: | ||
print(f"word {i}: rectangle mismatch {delta}") | ||
return False | ||
return True |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import os | ||
import fitz | ||
from gentle_compare import gentle_compare | ||
|
||
scriptdir = os.path.dirname(__file__) | ||
|
||
|
||
def test_remove_rotation(): | ||
"""Remove rotation verifying identical appearance and text.""" | ||
filename = os.path.join(scriptdir, "resources", "test-2812.pdf") | ||
doc = fitz.open(filename) | ||
|
||
# We always create fresh pages to avoid false positves from cache content. | ||
# Text on these pages consists of pairwise different strings, sorting by | ||
# these strings must therefore yield identical bounding boxes. | ||
for i in range(1, doc.page_count): | ||
assert doc[i].rotation # must be a rotated page | ||
pix0 = doc[i].get_pixmap() # make image | ||
words0 = [] | ||
for w in doc[i].get_text("words"): | ||
words0.append(list(fitz.Rect(w[:4]) * doc[i].rotation_matrix) + [w[4]]) | ||
words0.sort(key=lambda w: w[4]) # sort by word strings | ||
# derotate page and confirm nothing else has changed | ||
doc[i].remove_rotation() | ||
assert doc[i].rotation == 0 | ||
pix1 = doc[i].get_pixmap() | ||
words1 = doc[i].get_text("words") | ||
words1.sort(key=lambda w: w[4]) # sort by word strings | ||
assert pix1.digest == pix0.digest, f"{pix1.digest}/{pix0.digest}" | ||
assert gentle_compare(words0, words1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.