Skip to content

Commit

Permalink
tests/test_textextract.py: check we can use direct mupdf fns with reb…
Browse files Browse the repository at this point in the history
…ase.
  • Loading branch information
julian-smith-artifex-com committed Sep 19, 2023
1 parent f1cef42 commit 4809847
Showing 1 changed file with 30 additions and 2 deletions.
32 changes: 30 additions & 2 deletions tests/test_textextract.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@

import fitz

scriptdir = os.path.abspath(os.path.dirname(__file__))
pymupdfdir = os.path.abspath(f'{__file__}/../..')
scriptdir = f'{pymupdfdir}/tests'
filename = os.path.join(scriptdir, "resources", "symbol-list.pdf")


Expand Down Expand Up @@ -70,4 +71,31 @@ def _test_extract3():
t = time.time() - t0
print(f't={t}')
sys.stdout.flush()


def test_extract4():
'''
Rebased-specific.
'''
if not hasattr(fitz, 'mupdf'):
return
path = f'{pymupdfdir}/tests/resources/2.pdf'
document = fitz.open(path)
page = document[4]

out = 'test_stext.html'
text = page.get_text('html')
with open(out, 'w') as f:
f.write(text)
print(f'Have written to: {out}')

out = 'test_extract.html'
writer = fitz.mupdf.FzDocumentWriter(
out,
'html',
fitz.mupdf.FzDocumentWriter.PathType_DOCX,
)
device = fitz.mupdf.fz_begin_page(writer, fitz.mupdf.fz_bound_page(page))
fitz.mupdf.fz_run_page(page, device, fitz.mupdf.FzMatrix(), fitz.mupdf.FzCookie())
fitz.mupdf.fz_end_page(writer)
fitz.mupdf.fz_close_document_writer(writer)
print(f'Have written to: {out}')

0 comments on commit 4809847

Please sign in to comment.