Skip to content

Commit

Permalink
test 707560
Browse files Browse the repository at this point in the history
MuPDF redaction used to leave behind glyph particles with an empty boundary box. This frequently happens in scripting systems like Devanagari.
This script performs a MuPDF regression test in this respect.
  • Loading branch information
JorjMcKie committed Mar 29, 2024
1 parent ef9bca2 commit 82b8b25
Showing 1 changed file with 43 additions and 0 deletions.
43 changes: 43 additions & 0 deletions tests/test_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -797,6 +797,49 @@ def test_2957_2():
bbox0 = fitz.Rect(w0[:4]).irect # its IRect coordinates
assert bbox0 == bbox1 # must be same coordinates


def test_707560():
"""https://bugs.ghostscript.com/show_bug.cgi?id=707560
Ensure that redactions also remove characters with an empty width bbox.
"""
# Make text that will contain characters with an empty bbox.

greetings = (
"Hello, World!", # english
"Hallo, Welt!", # german
"سلام دنیا!", # persian
"வணக்கம், உலகம்!", # tamil
"สวัสดีชาวโลก!", # thai
"Привіт Світ!", # ucranian
"שלום עולם!", # hebrew
"ওহে বিশ্ব!", # bengali
"你好世界!", # chinese
"こんにちは世界!", # japanese
"안녕하세요, 월드!", # korean
"नमस्कार, विश्व !", # sanskrit
"हैलो वर्ल्ड!", # hindi
)
text = " ... ".join([g for g in greetings])
where = (50, 50, 400, 500)
story = fitz.Story(text)
bio = io.BytesIO()
writer = fitz.DocumentWriter(bio)
more = True
while more:
dev = writer.begin_page(fitz.paper_rect("a4"))
more, _ = story.place(where)
story.draw(dev)
writer.end_page()
writer.close()
doc = fitz.open("pdf", bio)
page = doc[0]
text = page.get_text()
assert text, "Unexpected: test page has no text."
page.add_redact_annot(page.rect)
page.apply_redactions()
assert not page.get_text(), "Unexpected: text not fully redacted."


def test_3070():
with fitz.open(os.path.abspath(f'{__file__}/../../tests/resources/test_3070.pdf')) as pdf:
links = pdf[0].get_links()
Expand Down

0 comments on commit 82b8b25

Please sign in to comment.