Some provisional definitions

To cope with upcoming features of MuPDF we make a few provisional definitions for flags that will be introduced then. This will avoid complex version checks in our code. Also adjust a table markdown output test: We do no longer replace line breaks by simple spaces, but by HTML line breaks `<br>`.
pymupdf · Dec 12, 2024 · 53f8a72 · 53f8a72
1 parent 902e92c
commit 53f8a72
Show file tree

Hide file tree

Showing 3 changed files with 30 additions and 20 deletions.
diff --git a/src/__init__.py b/src/__init__.py
@@ -8314,7 +8314,7 @@ def _show_pdf_page(self, fz_srcpage, overlay=1, matrix=None, xref=0, oc=0, clip=
         #-------------------------------------------------------------
         resources = mupdf.pdf_dict_get_inheritable(tpageref, PDF_NAME('Resources'))
         if not resources.m_internal:
-            resources = mupdf.pdf_dict_put_dict(tpageref,PDF_NAME('Resources'),5) 
+            resources = mupdf.pdf_dict_put_dict(tpageref,PDF_NAME('Resources'),5)
         subres = mupdf.pdf_dict_get(resources, PDF_NAME('XObject'))
         if not subres.m_internal:
             subres = mupdf.pdf_dict_put_dict(resources, PDF_NAME('XObject'), 5)
@@ -9199,7 +9199,7 @@ def remove_rotation(self):
                 pass
 
         for xref, rect in widgets:  # modify field rectangles
-            widget = page.load_widget(xref)
+            widget = self.load_widget(xref)
             widget.rect = r
             widget.update()
         return rot  # the inverse of the generated derotation matrix
@@ -13487,6 +13487,13 @@ def width(self):
 TEXT_OUTPUT_XML = 3
 TEXT_OUTPUT_XHTML = 4
 
+TEXT_STRIKEOUT = 1
+TEXT_UNDERLINE = 2
+TEXT_SYNTHETIC = 4
+TEXT_BOLD = 8
+TEXT_FILLED = 16
+TEXT_STROKED = 32
+
 TEXT_PRESERVE_LIGATURES = mupdf.FZ_STEXT_PRESERVE_LIGATURES
 TEXT_PRESERVE_WHITESPACE = mupdf.FZ_STEXT_PRESERVE_WHITESPACE
 TEXT_PRESERVE_IMAGES = mupdf.FZ_STEXT_PRESERVE_IMAGES
@@ -13507,6 +13514,7 @@ def width(self):
     TEXT_COLLECT_VECTORS = 1024
     TEXT_IGNORE_ACTUALTEXT = 2048
     TEXT_STEXT_SEGMENT = 4096
+TEXT_COLLECT_FLAGS = 32768  # mupdf.FZ_STEXT_COLLECT_FLAGS
 
 TEXTFLAGS_WORDS = (0
         | TEXT_PRESERVE_LIGATURES
@@ -16509,6 +16517,7 @@ def __str__(self):
         font_flags = JM_char_font_flags(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)), line, ch)
         origin = mupdf.FzPoint(ch.m_internal.origin)
         style.size = ch.m_internal.size
+        style.font_flags = font_flags
         style.flags = ch.m_internal.flags
         style.font = JM_font_name(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))
         if THIS_MUPDF >= MUPDF1250:
@@ -16519,7 +16528,8 @@ def __str__(self):
         style.asc = JM_font_ascender(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))
         style.desc = JM_font_descender(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))
 
-        if (0
+        if (
+            0
             or style.size != old_style.size
             or style.bidi != old_style.bidi
             or style.font_flags != old_style.font_flags
@@ -16528,7 +16538,7 @@ def __str__(self):
             or style.color != old_style.color
             or style.opacity != old_style.opacity
             or style.font != old_style.font
-            ):
+        ):
             if old_style.size > 0:
                 # not first one, output previous
                 if raw:
@@ -16562,24 +16572,24 @@ def __str__(self):
             span["descender"] = desc
             span["opacity"] = style.opacity
             # add more keys depending on MuPDF version
-            if THIS_MUPDF >= MUPDF1250:  #separate if because not flags-dependent
+            if THIS_MUPDF >= MUPDF1250:  # separate if because not flags-dependent
                 span["opacity"] = style.opacity
                 # rest of keys only make sense for FZ_STEXT_COLLECT_FLAGS
-                if dev_flags & mupdf.FZ_STEXT_COLLECT_FLAGS:
-                    span["underline"] = bool(style.flags & mupdf.FZ_STEXT_UNDERLINE)
-                    span["strikeout"] = bool(style.flags & mupdf.FZ_STEXT_STRIKEOUT)
+                if dev_flags & TEXT_COLLECT_FLAGS:
+                    span["underline"] = bool(style.flags & TEXT_UNDERLINE)
+                    span["strikeout"] = bool(style.flags & TEXT_STRIKEOUT)
                 else:
                     span["underline"] = None
                     span["strikeout"] = None
 
             if THIS_MUPDF > MUPDF1251:
-                if dev_flags & mupdf.FZ_STEXT_COLLECT_FLAGS:
-                    span["bold"] = bool(style.flags & mupdf.FZ_STEXT_BOLD)
+                if dev_flags & TEXT_COLLECT_FLAGS:
+                    span["bold"] = bool(style.flags & TEXT_BOLD)
                 else:
                     span["bold"] = None
-                span["filled"] = bool(style.flags & mupdf.FZ_STEXT_FILLED)
-                span["stroked"] = bool(style.flags & mupdf.FZ_STEXT_STROKED)
-                span["clipped"] = bool(style.flags & mupdf.FZ_STEXT_CLIPPED)
+                span["filled"] = bool(style.flags & TEXT_FILLED)
+                span["stroked"] = bool(style.flags & TEXT_STROKED)
+                span["clipped"] = bool(style.flags & TEXT_CLIPPED)
 
             # Need to be careful here - doing 'old_style=style' does a shallow
             # copy, but we need to keep old_style as a distinct instance.
@@ -16594,7 +16604,7 @@ def __str__(self):
             char_dict[dictkey_origin] = JM_py_from_point( ch.m_internal.origin)
             char_dict[dictkey_bbox] = JM_py_from_rect(r)
             if THIS_MUPDF >= MUPDF1250:
-                char_dict["synthetic"] = bool(ch.m_internal.flags & mupdf.FZ_STEXT_SYNTHETIC)
+                char_dict["synthetic"] = bool(ch.m_internal.flags & TEXT_SYNTHETIC)
             char_dict[dictkey_c] = chr(ch.m_internal.c)
 
             if char_list is None:

diff --git a/src/extra.i b/src/extra.i
@@ -3125,7 +3125,7 @@ mupdf::FzRect JM_make_spanlist(
                 DICT_SETITEMSTR_DROP(span, "opacity", Py_BuildValue("f", style.opacity));
             #endif
 
-            // rest of keys only make sense if FZ_STEXT_COLLECT_FLAGS was set
+            // rest of keys only make sense if FZ_STEXT_COLLECT_FLAGS (32768) was set
             #if (THIS_MUPDF >= MUPDF1250)
                 if (dev_flags & 32768)
                 {
@@ -3139,9 +3139,9 @@ mupdf::FzRect JM_make_spanlist(
                 }
             #endif
             #if (THIS_MUPDF > MUPDF1251)
-                if (dev_flags & FZ_STEXT_COLLECT_FLAGS)
+                if (dev_flags & 32768) // FZ_STEXT_COLLECT_FLAGS = 32768
                 {
-                    DICT_SETITEMSTR_DROP(span, "bold", JM_BOOL(style.flags & FZ_STEXT_BOLD));
+                    DICT_SETITEMSTR_DROP(span, "bold", JM_BOOL(style.flags & 8)); // FZ_STEXT_BOLD = 8
                 }
                 else
                 {

diff --git a/tests/test_tables.py b/tests/test_tables.py
@@ -288,10 +288,10 @@ def test_markdown():
     text = (
         "|Header1|Header2|Header3|\n"
         "|---|---|---|\n"
-        "|Col11 Col12|Col21 Col22|Col31 Col32 Col33|\n"
-        "|Col13|Col23|Col34 Col35|\n"
+        "|Col11 Col12|Col21<br>Col22|Col31<br>Col32<br>Col33|\n"
+        "|Col13|Col23|Col34<br>Col35|\n"
         "|Col14|Col24|Col36|\n"
-        "|Col15|Col25 Col26||\n\n"
+        "|Col15|Col25<br>Col26||\n\n"
     )
     assert tab.to_markdown() == text