Skip to content

Commit

Permalink
Some provisional definitions
Browse files Browse the repository at this point in the history
To cope with upcoming features of MuPDF we make a few provisional definitions for flags that will be introduced then.
This will avoid complex version checks in our code.

Also adjust a table markdown output test:
We do no longer replace line breaks by simple spaces, but by HTML line breaks `<br>`.
  • Loading branch information
JorjMcKie committed Dec 12, 2024
1 parent 902e92c commit 53f8a72
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 20 deletions.
38 changes: 24 additions & 14 deletions src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8314,7 +8314,7 @@ def _show_pdf_page(self, fz_srcpage, overlay=1, matrix=None, xref=0, oc=0, clip=
#-------------------------------------------------------------
resources = mupdf.pdf_dict_get_inheritable(tpageref, PDF_NAME('Resources'))
if not resources.m_internal:
resources = mupdf.pdf_dict_put_dict(tpageref,PDF_NAME('Resources'),5)
resources = mupdf.pdf_dict_put_dict(tpageref,PDF_NAME('Resources'),5)
subres = mupdf.pdf_dict_get(resources, PDF_NAME('XObject'))
if not subres.m_internal:
subres = mupdf.pdf_dict_put_dict(resources, PDF_NAME('XObject'), 5)
Expand Down Expand Up @@ -9199,7 +9199,7 @@ def remove_rotation(self):
pass

for xref, rect in widgets: # modify field rectangles
widget = page.load_widget(xref)
widget = self.load_widget(xref)
widget.rect = r
widget.update()
return rot # the inverse of the generated derotation matrix
Expand Down Expand Up @@ -13487,6 +13487,13 @@ def width(self):
TEXT_OUTPUT_XML = 3
TEXT_OUTPUT_XHTML = 4

TEXT_STRIKEOUT = 1
TEXT_UNDERLINE = 2
TEXT_SYNTHETIC = 4
TEXT_BOLD = 8
TEXT_FILLED = 16
TEXT_STROKED = 32

TEXT_PRESERVE_LIGATURES = mupdf.FZ_STEXT_PRESERVE_LIGATURES
TEXT_PRESERVE_WHITESPACE = mupdf.FZ_STEXT_PRESERVE_WHITESPACE
TEXT_PRESERVE_IMAGES = mupdf.FZ_STEXT_PRESERVE_IMAGES
Expand All @@ -13507,6 +13514,7 @@ def width(self):
TEXT_COLLECT_VECTORS = 1024
TEXT_IGNORE_ACTUALTEXT = 2048
TEXT_STEXT_SEGMENT = 4096
TEXT_COLLECT_FLAGS = 32768 # mupdf.FZ_STEXT_COLLECT_FLAGS

TEXTFLAGS_WORDS = (0
| TEXT_PRESERVE_LIGATURES
Expand Down Expand Up @@ -16509,6 +16517,7 @@ def __str__(self):
font_flags = JM_char_font_flags(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)), line, ch)
origin = mupdf.FzPoint(ch.m_internal.origin)
style.size = ch.m_internal.size
style.font_flags = font_flags
style.flags = ch.m_internal.flags
style.font = JM_font_name(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))
if THIS_MUPDF >= MUPDF1250:
Expand All @@ -16519,7 +16528,8 @@ def __str__(self):
style.asc = JM_font_ascender(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))
style.desc = JM_font_descender(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))

if (0
if (
0
or style.size != old_style.size
or style.bidi != old_style.bidi
or style.font_flags != old_style.font_flags
Expand All @@ -16528,7 +16538,7 @@ def __str__(self):
or style.color != old_style.color
or style.opacity != old_style.opacity
or style.font != old_style.font
):
):
if old_style.size > 0:
# not first one, output previous
if raw:
Expand Down Expand Up @@ -16562,24 +16572,24 @@ def __str__(self):
span["descender"] = desc
span["opacity"] = style.opacity
# add more keys depending on MuPDF version
if THIS_MUPDF >= MUPDF1250: #separate if because not flags-dependent
if THIS_MUPDF >= MUPDF1250: # separate if because not flags-dependent
span["opacity"] = style.opacity
# rest of keys only make sense for FZ_STEXT_COLLECT_FLAGS
if dev_flags & mupdf.FZ_STEXT_COLLECT_FLAGS:
span["underline"] = bool(style.flags & mupdf.FZ_STEXT_UNDERLINE)
span["strikeout"] = bool(style.flags & mupdf.FZ_STEXT_STRIKEOUT)
if dev_flags & TEXT_COLLECT_FLAGS:
span["underline"] = bool(style.flags & TEXT_UNDERLINE)
span["strikeout"] = bool(style.flags & TEXT_STRIKEOUT)
else:
span["underline"] = None
span["strikeout"] = None

if THIS_MUPDF > MUPDF1251:
if dev_flags & mupdf.FZ_STEXT_COLLECT_FLAGS:
span["bold"] = bool(style.flags & mupdf.FZ_STEXT_BOLD)
if dev_flags & TEXT_COLLECT_FLAGS:
span["bold"] = bool(style.flags & TEXT_BOLD)
else:
span["bold"] = None
span["filled"] = bool(style.flags & mupdf.FZ_STEXT_FILLED)
span["stroked"] = bool(style.flags & mupdf.FZ_STEXT_STROKED)
span["clipped"] = bool(style.flags & mupdf.FZ_STEXT_CLIPPED)
span["filled"] = bool(style.flags & TEXT_FILLED)
span["stroked"] = bool(style.flags & TEXT_STROKED)
span["clipped"] = bool(style.flags & TEXT_CLIPPED)

# Need to be careful here - doing 'old_style=style' does a shallow
# copy, but we need to keep old_style as a distinct instance.
Expand All @@ -16594,7 +16604,7 @@ def __str__(self):
char_dict[dictkey_origin] = JM_py_from_point( ch.m_internal.origin)
char_dict[dictkey_bbox] = JM_py_from_rect(r)
if THIS_MUPDF >= MUPDF1250:
char_dict["synthetic"] = bool(ch.m_internal.flags & mupdf.FZ_STEXT_SYNTHETIC)
char_dict["synthetic"] = bool(ch.m_internal.flags & TEXT_SYNTHETIC)
char_dict[dictkey_c] = chr(ch.m_internal.c)

if char_list is None:
Expand Down
6 changes: 3 additions & 3 deletions src/extra.i
Original file line number Diff line number Diff line change
Expand Up @@ -3125,7 +3125,7 @@ mupdf::FzRect JM_make_spanlist(
DICT_SETITEMSTR_DROP(span, "opacity", Py_BuildValue("f", style.opacity));
#endif

// rest of keys only make sense if FZ_STEXT_COLLECT_FLAGS was set
// rest of keys only make sense if FZ_STEXT_COLLECT_FLAGS (32768) was set
#if (THIS_MUPDF >= MUPDF1250)
if (dev_flags & 32768)
{
Expand All @@ -3139,9 +3139,9 @@ mupdf::FzRect JM_make_spanlist(
}
#endif
#if (THIS_MUPDF > MUPDF1251)
if (dev_flags & FZ_STEXT_COLLECT_FLAGS)
if (dev_flags & 32768) // FZ_STEXT_COLLECT_FLAGS = 32768
{
DICT_SETITEMSTR_DROP(span, "bold", JM_BOOL(style.flags & FZ_STEXT_BOLD));
DICT_SETITEMSTR_DROP(span, "bold", JM_BOOL(style.flags & 8)); // FZ_STEXT_BOLD = 8
}
else
{
Expand Down
6 changes: 3 additions & 3 deletions tests/test_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,10 +288,10 @@ def test_markdown():
text = (
"|Header1|Header2|Header3|\n"
"|---|---|---|\n"
"|Col11 Col12|Col21 Col22|Col31 Col32 Col33|\n"
"|Col13|Col23|Col34 Col35|\n"
"|Col11 Col12|Col21<br>Col22|Col31<br>Col32<br>Col33|\n"
"|Col13|Col23|Col34<br>Col35|\n"
"|Col14|Col24|Col36|\n"
"|Col15|Col25 Col26||\n\n"
"|Col15|Col25<br>Col26||\n\n"
)
assert tab.to_markdown() == text

Expand Down

0 comments on commit 53f8a72

Please sign in to comment.