Process tags and references in Tokenizer

vkbo · Oct 22, 2024 · bd5442c · bd5442c
1 parent c7554f2
commit bd5442c
Show file tree

Hide file tree

Showing 16 changed files with 588 additions and 465 deletions.
diff --git a/novelwriter/formats/shared.py b/novelwriter/formats/shared.py
@@ -86,8 +86,12 @@ class TextFmt(IntEnum):
     SUB_E = 14  # End subscript
     COL_B = 15  # Begin colour
     COL_E = 16  # End colour
-    FNOTE = 17  # Footnote marker
-    STRIP = 18  # Strip the format code
+    ANM_B = 17  # Begin anchor name
+    ANM_E = 18  # End anchor name
+    HRF_B = 19  # Begin href link
+    HRF_E = 20  # End href link
+    FNOTE = 21  # Footnote marker
+    STRIP = 22  # Strip the format code
 
 
 class BlockTyp(IntEnum):

diff --git a/novelwriter/formats/todocx.py b/novelwriter/formats/todocx.py
@@ -38,7 +38,7 @@
 
 from novelwriter import __version__
 from novelwriter.common import firstFloat, xmlSubElem
-from novelwriter.constants import nwHeadFmt, nwKeyWords, nwLabels, nwStyles
+from novelwriter.constants import nwHeadFmt, nwStyles
 from novelwriter.core.project import NWProject
 from novelwriter.formats.shared import BlockFmt, BlockTyp, T_Formats, TextFmt
 from novelwriter.formats.tokenizer import Tokenizer
@@ -119,12 +119,6 @@ def _docXCol(color: QColor) -> str:
 S_HEAD  = "Header"
 S_FNOTE = "FootnoteText"
 
-# Colours
-COL_DIALOG_M = "2a6099"
-COL_DIALOG_A = "813709"
-COL_META_TXT = "813709"
-COL_MARK_TXT = "ffffa6"
-
 
 class DocXXmlFile(NamedTuple):
 
@@ -292,8 +286,7 @@ def doConvert(self) -> None:
                 self._processFragments(par, S_META, tText, tFormat)
 
             elif tType == BlockTyp.KEYWORD:
-                tTemp, tFmt = self._formatKeywords(tText)
-                self._processFragments(par, S_META, tTemp, tFmt)
+                self._processFragments(par, S_META, tText, tFormat)
 
         return
 
@@ -369,22 +362,6 @@ def xmlToZip(name: str, xObj: ET.Element, zipObj: ZipFile) -> None:
     #  Internal Functions
     ##
 
-    def _formatKeywords(self, text: str) -> tuple[str, T_Formats]:
-        """Apply formatting to keywords."""
-        valid, bits, _ = self._project.index.scanThis("@"+text)
-        if not valid or not bits or bits[0] not in nwLabels.KEY_NAME:
-            return "", []
-
-        rTxt = f"{self._localLookup(nwLabels.KEY_NAME[bits[0]])}: "
-        rFmt: T_Formats = [(0, TextFmt.B_B, ""), (len(rTxt) - 1, TextFmt.B_E, "")]
-        if len(bits) > 1:
-            if bits[0] == nwKeyWords.TAG_KEY:
-                rTxt += bits[1]
-            else:
-                rTxt += ", ".join(bits[1:])
-
-        return rTxt, rFmt
-
     def _processFragments(
         self, par: DocXParagraph, pStyle: str, text: str, tFmt: T_Formats | None = None
     ) -> None:
@@ -465,7 +442,7 @@ def _textRunToXml(self, text: str, fmt: int, fClass: str = "") -> ET.Element:
             xmlSubElem(rPr, _wTag("u"), attrib={_wTag("val"): "single"})
         if fmt & X_MRK:
             xmlSubElem(rPr, _wTag("shd"), attrib={
-                _wTag("fill"): COL_MARK_TXT, _wTag("val"): "clear",
+                _wTag("fill"): _docXCol(self._theme.highlight), _wTag("val"): "clear",
             })
         if fmt & X_DEL:
             xmlSubElem(rPr, _wTag("strike"))

diff --git a/novelwriter/formats/tohtml.py b/novelwriter/formats/tohtml.py
@@ -30,7 +30,7 @@
 from time import time
 
 from novelwriter.common import formatTimeStamp
-from novelwriter.constants import nwHeadFmt, nwHtmlUnicode, nwKeyWords, nwLabels
+from novelwriter.constants import nwHeadFmt, nwHtmlUnicode
 from novelwriter.core.project import NWProject
 from novelwriter.formats.shared import BlockFmt, BlockTyp, T_Formats, TextFmt, stripEscape
 from novelwriter.formats.tokenizer import Tokenizer
@@ -48,6 +48,8 @@
     TextFmt.SUP_B: (TextFmt.SUP_E, "<sup>"),
     TextFmt.SUB_B: (TextFmt.SUB_E, "<sub>"),
     TextFmt.COL_B: (TextFmt.COL_E, "<span style='color: {0}'>"),
+    TextFmt.ANM_B: (TextFmt.ANM_E, "<a name='{0}'>"),
+    TextFmt.HRF_B: (TextFmt.HRF_E, "<a href='{0}'>"),
 }
 
 # Each closer tag, with the id of its corresponding opener and tag format
@@ -60,6 +62,8 @@
     TextFmt.SUP_E: (TextFmt.SUP_B, "</sup>"),
     TextFmt.SUB_E: (TextFmt.SUB_B, "</sub>"),
     TextFmt.COL_E: (TextFmt.COL_B, "</span>"),
+    TextFmt.ANM_E: (TextFmt.ANM_B, "</a>"),
+    TextFmt.HRF_E: (TextFmt.HRF_B, "</a>"),
 }
 
 # Empty HTML tag record
@@ -155,21 +159,21 @@ def doConvert(self) -> None:
         lines = []
         tHandle = self._handle
 
-        for tType, nHead, tText, tFormat, tStyle in self._blocks:
+        for tType, nHead, tText, tFmt, tStyle in self._blocks:
 
             # Replace < and > with HTML entities
-            if tFormat:
+            if tFmt:
                 # If we have formatting, we must recompute the locations
                 cText = []
                 i = 0
                 for c in tText:
                     if c == "<":
                         cText.append("&lt;")
-                        tFormat = [(p + 3 if p > i else p, f, k) for p, f, k in tFormat]
+                        tFmt = [(p + 3 if p > i else p, f, k) for p, f, k in tFmt]
                         i += 4
                     elif c == ">":
                         cText.append("&gt;")
-                        tFormat = [(p + 3 if p > i else p, f, k) for p, f, k in tFormat]
+                        tFmt = [(p + 3 if p > i else p, f, k) for p, f, k in tFmt]
                         i += 4
                     else:
                         cText.append(c)
@@ -221,7 +225,7 @@ def doConvert(self) -> None:
 
             # Process Text Type
             if tType == BlockTyp.TEXT:
-                lines.append(f"<p{hStyle}>{self._formatText(tText, tFormat)}</p>\n")
+                lines.append(f"<p{hStyle}>{self._formatText(tText, tFmt)}</p>\n")
 
             elif tType == BlockTyp.TITLE:
                 tHead = tText.replace(nwHeadFmt.BR, "<br>")
@@ -250,13 +254,10 @@ def doConvert(self) -> None:
                 lines.append(f"<p class='skip'{hStyle}>&nbsp;</p>\n")
 
             elif tType == BlockTyp.COMMENT:
-                lines.append(f"<p class='comment'>{self._formatText(tText, tFormat)}</p>\n")
+                lines.append(f"<p class='comment'{hStyle}>{self._formatText(tText, tFmt)}</p>\n")
 
             elif tType == BlockTyp.KEYWORD:
-                tag, text = self._formatKeywords(tText)
-                kClass = f" class='meta meta-{tag}'" if tag else ""
-                tTemp = f"<p{kClass}{hStyle}>{text}</p>\n"
-                lines.append(tTemp)
+                lines.append(f"<p class='meta'{hStyle}>{self._formatText(tText, tFmt)}</p>\n")
 
         self._result = "".join(lines)
         self._fullHTML.append(self._result)
@@ -431,9 +432,8 @@ def getStyleSheet(self) -> list[str]:
             mScale, mScale
         ))
 
-        styles.append("a {color: rgb(66, 113, 174);}")
-        styles.append("mark {background: rgb(255, 255, 166);}")
-        styles.append(".keyword {color: rgb(245, 135, 31); font-weight: bold;}")
+        styles.append("a {{color: {0:s};}}".format(self._theme.head.name(QtHexRgb)))
+        styles.append("mark {{background: {0:s};}}".format(self._theme.highlight.name(QtHexRgb)))
 
         return styles
 
@@ -455,6 +455,8 @@ def _formatText(self, text: str, tFmt: T_Formats) -> str:
                 if not state.get(fmt, True):
                     if fmt == TextFmt.COL_B and (color := self._classes.get(data)):
                         tags.append((pos, m[1].format(color.name(QtHexRgb))))
+                    elif fmt in (TextFmt.ANM_B, TextFmt.HRF_B):
+                        tags.append((pos, m[1].format(data or "#")))
                     else:
                         tags.append((pos, m[1]))
                     state[fmt] = True
@@ -488,23 +490,3 @@ def _formatText(self, text: str, tFmt: T_Formats) -> str:
         temp = temp.replace("\n", "<br>")
 
         return stripEscape(temp)
-
-    def _formatKeywords(self, text: str) -> tuple[str, str]:
-        """Apply HTML formatting to keywords."""
-        valid, bits, _ = self._project.index.scanThis("@"+text)
-        if not valid or not bits or bits[0] not in nwLabels.KEY_NAME:
-            return "", ""
-
-        result = f"<span class='keyword'>{self._localLookup(nwLabels.KEY_NAME[bits[0]])}:</span> "
-        if len(bits) > 1:
-            if bits[0] == nwKeyWords.TAG_KEY:
-                one, two = self._project.index.parseValue(bits[1])
-                result += f"<a class='tag' name='tag_{one}'>{one}</a>"
-                if two:
-                    result += f" | <span class='optional'>{two}</a>"
-            else:
-                result += ", ".join(
-                    f"<a class='tag' href='#tag_{t}'>{t}</a>" for t in bits[1:]
-                )
-
-        return bits[0][1:], result
diff --git a/novelwriter/formats/tokenizer.py b/novelwriter/formats/tokenizer.py
@@ -127,10 +127,11 @@ def __init__(self, project: NWProject) -> None:
         self._doSynopsis   = False   # Also process synopsis comments
         self._doComments   = False   # Also process comments
         self._doKeywords   = False   # Also process keywords like tags and references
-        self._skipKeywords = set()   # Keywords to ignore
         self._keepBreaks   = True    # Keep line breaks in paragraphs
         self._defaultAlign = "left"  # The default text alignment
 
+        self._skipKeywords: set[str] = set()  # Keywords to ignore
+
         # Other Setting
         self._theme = TextDocumentTheme()
         self._classes: dict[str, QColor] = {}
@@ -464,6 +465,9 @@ def initDocument(self) -> None:
         self._classes["comment"] = self._theme.comment
         self._classes["dialog"] = self._theme.dialog
         self._classes["altdialog"] = self._theme.altdialog
+        self._classes["tag"] = self._theme.tag
+        self._classes["keyword"] = self._theme.keyword
+        self._classes["optional"] = self._theme.optional
         return
 
     def addRootHeading(self, tHandle: str) -> None:
@@ -614,7 +618,9 @@ def tokenizeText(self) -> None:
                 if cStyle in (nwComment.SYNOPSIS, nwComment.SHORT, nwComment.PLAIN):
                     bStyle = COMMENT_STYLE[cStyle]
                     tLine, tFmt = self._formatComment(bStyle, cKey, cText)
-                    blocks.append((BlockTyp.COMMENT, nHead, tLine, tFmt, sAlign))
+                    blocks.append((
+                        BlockTyp.COMMENT, nHead, tLine, tFmt, sAlign
+                    ))
                     if self._keepRaw:
                         tmpMarkdown.append(f"{aLine}\n")
 
@@ -633,31 +639,14 @@ def tokenizeText(self) -> None:
                 if not self._doKeywords:
                     continue
 
-                valid, bits, _ = self._project.index.scanThis(aLine)
-                if (
-                    valid and bits and bits[0] in nwLabels.KEY_NAME
-                    and bits[0] not in self._skipKeywords
-                ):
+                tLine, tFmt = self._formatMeta(aLine)
+                if tLine:
                     blocks.append((
-                        BlockTyp.KEYWORD, nHead, aLine[1:].strip(), [], sAlign
+                        BlockTyp.KEYWORD, nHead, tLine, tFmt, sAlign
                     ))
                     if self._keepRaw:
                         tmpMarkdown.append(f"{aLine}\n")
 
-                # valid, bits, _ = self._project.index.scanThis("@"+text)
-                # if not valid or not bits or bits[0] not in nwLabels.KEY_NAME:
-                #     return "", []
-
-                # rTxt = f"{self._localLookup(nwLabels.KEY_NAME[bits[0]])}: "
-                # rFmt: T_Formats = [(0, TextFmt.B_B, ""), (len(rTxt) - 1, TextFmt.B_E, "")]
-                # if len(bits) > 1:
-                #     if bits[0] == nwKeyWords.TAG_KEY:
-                #         rTxt += bits[1]
-                #     else:
-                #         rTxt += ", ".join(bits[1:])
-
-                # return rTxt, rFmt
-
             elif aLine.startswith(("# ", "#! ")):
                 # Title or Partition Headings
                 # ===========================
@@ -1033,22 +1022,12 @@ def countStats(self) -> None:
                 allChars += nChars
                 allWordChars += nWChars
 
-            elif tType == BlockTyp.COMMENT:
+            elif tType in (BlockTyp.COMMENT, BlockTyp.KEYWORD):
                 words = tText.split()
                 allWords += len(words)
                 allChars += len(tText)
                 allWordChars += len("".join(words))
 
-            elif tType == BlockTyp.KEYWORD:
-                valid, bits, _ = self._project.index.scanThis("@"+tText)
-                if valid and bits:
-                    key = self._localLookup(nwLabels.KEY_NAME[bits[0]])
-                    text = "{0}: {1}".format(key, ", ".join(bits[1:]))
-                    words = text.split()
-                    allWords += len(words)
-                    allChars += len(text)
-                    allWordChars += len("".join(words))
-
         self._counts["titleCount"] = titleCount
         self._counts["paragraphCount"] = paragraphCount
 
@@ -1112,6 +1091,55 @@ def _formatComment(self, style: ComStyle, key: str, text: str) -> tuple[str, T_F
             rFmt.extend((p + shift, f, d) for p, f, d in tFmt)
         return tTxt, rFmt
 
+    def _formatMeta(self, text: str) -> tuple[str, T_Formats]:
+        """Parse a meta line into a """
+        txt = []
+        fmt = []
+        valid, bits, _ = self._project.index.scanThis(text)
+        if valid and bits and bits[0] in nwLabels.KEY_NAME and bits[0] not in self._skipKeywords:
+            pos = 0
+            lbl = f"{self._localLookup(nwLabels.KEY_NAME[bits[0]])}:"
+            end = len(lbl)
+            fmt = [
+                (pos, TextFmt.B_B, ""), (pos, TextFmt.COL_B, "keyword"),
+                (end, TextFmt.B_E, ""), (end, TextFmt.COL_E, ""),
+            ]
+            txt = [lbl, " "]
+            pos = end + 1
+
+            if (num := len(bits)) > 1:
+                if bits[0] == nwKeyWords.TAG_KEY:
+                    one, two = self._project.index.parseValue(bits[1])
+                    end = pos + len(one)
+                    fmt.append((pos, TextFmt.COL_B, "tag"))
+                    fmt.append((pos, TextFmt.ANM_B, f"tag_{one}".lower()))
+                    fmt.append((end, TextFmt.ANM_E, ""))
+                    fmt.append((end, TextFmt.COL_E, ""))
+                    txt.append(one)
+                    pos = end
+                    if two:
+                        txt.append(" | ")
+                        pos += 3
+                        end = pos + len(two)
+                        fmt.append((pos, TextFmt.COL_B, "optional"))
+                        fmt.append((end, TextFmt.COL_E, ""))
+                        txt.append(two)
+                        pos = end
+                else:
+                    for n, bit in enumerate(bits[1:], 2):
+                        end = pos + len(bit)
+                        fmt.append((pos, TextFmt.COL_B, "tag"))
+                        fmt.append((pos, TextFmt.HRF_B, f"#tag_{bit}".lower()))
+                        fmt.append((end, TextFmt.HRF_E, ""))
+                        fmt.append((end, TextFmt.COL_E, ""))
+                        txt.append(bit)
+                        pos = end
+                        if n < num:
+                            txt.append(", ")
+                            pos += 2
+
+        return "".join(txt), fmt
+
     def _extractFormats(
         self, text: str, skip: int = 0, hDialog: bool = False
     ) -> tuple[str, T_Formats]:

diff --git a/novelwriter/formats/tomarkdown.py b/novelwriter/formats/tomarkdown.py
@@ -27,7 +27,7 @@
 
 from pathlib import Path
 
-from novelwriter.constants import nwHeadFmt, nwLabels, nwUnicode
+from novelwriter.constants import nwHeadFmt, nwUnicode
 from novelwriter.core.project import NWProject
 from novelwriter.formats.shared import BlockFmt, BlockTyp, T_Formats, TextFmt
 from novelwriter.formats.tokenizer import Tokenizer
@@ -154,7 +154,8 @@ def doConvert(self) -> None:
                 lines.append(f"{self._formatText(tText, tFormat, mTags)}\n\n")
 
             elif tType == BlockTyp.KEYWORD:
-                lines.append(self._formatKeywords(tText, tStyle))
+                end = "  \n" if tStyle & BlockFmt.Z_BTMMRG else "\n\n"
+                lines.append(f"{self._formatText(tText, tFormat, mTags)}{end}")
 
         self._result = "".join(lines)
         self._fullMD.append(self._result)
@@ -215,19 +216,3 @@ def _formatText(self, text: str, tFmt: T_Formats, tags: dict[TextFmt, str]) -> s
                 md = tags.get(fmt, "")
             temp = f"{temp[:pos]}{md}{temp[pos:]}"
         return temp
-
-    def _formatKeywords(self, text: str, style: BlockFmt) -> str:
-        """Apply Markdown formatting to keywords."""
-        valid, bits, _ = self._project.index.scanThis("@"+text)
-        if not valid or not bits:
-            return ""
-
-        result = ""
-        if bits[0] in nwLabels.KEY_NAME:
-            result += f"**{self._localLookup(nwLabels.KEY_NAME[bits[0]])}:** "
-            if len(bits) > 1:
-                result += ", ".join(bits[1:])
-
-        result += "  \n" if style & BlockFmt.Z_BTMMRG else "\n\n"
-
-        return result