Skip to content

Commit

Permalink
Process tags and references in Tokenizer
Browse files Browse the repository at this point in the history
  • Loading branch information
vkbo committed Oct 22, 2024
1 parent c7554f2 commit bd5442c
Show file tree
Hide file tree
Showing 16 changed files with 588 additions and 465 deletions.
8 changes: 6 additions & 2 deletions novelwriter/formats/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,12 @@ class TextFmt(IntEnum):
SUB_E = 14 # End subscript
COL_B = 15 # Begin colour
COL_E = 16 # End colour
FNOTE = 17 # Footnote marker
STRIP = 18 # Strip the format code
ANM_B = 17 # Begin anchor name
ANM_E = 18 # End anchor name
HRF_B = 19 # Begin href link
HRF_E = 20 # End href link
FNOTE = 21 # Footnote marker
STRIP = 22 # Strip the format code


class BlockTyp(IntEnum):
Expand Down
29 changes: 3 additions & 26 deletions novelwriter/formats/todocx.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@

from novelwriter import __version__
from novelwriter.common import firstFloat, xmlSubElem
from novelwriter.constants import nwHeadFmt, nwKeyWords, nwLabels, nwStyles
from novelwriter.constants import nwHeadFmt, nwStyles
from novelwriter.core.project import NWProject
from novelwriter.formats.shared import BlockFmt, BlockTyp, T_Formats, TextFmt
from novelwriter.formats.tokenizer import Tokenizer
Expand Down Expand Up @@ -119,12 +119,6 @@ def _docXCol(color: QColor) -> str:
S_HEAD = "Header"
S_FNOTE = "FootnoteText"

# Colours
COL_DIALOG_M = "2a6099"
COL_DIALOG_A = "813709"
COL_META_TXT = "813709"
COL_MARK_TXT = "ffffa6"


class DocXXmlFile(NamedTuple):

Expand Down Expand Up @@ -292,8 +286,7 @@ def doConvert(self) -> None:
self._processFragments(par, S_META, tText, tFormat)

elif tType == BlockTyp.KEYWORD:
tTemp, tFmt = self._formatKeywords(tText)
self._processFragments(par, S_META, tTemp, tFmt)
self._processFragments(par, S_META, tText, tFormat)

return

Expand Down Expand Up @@ -369,22 +362,6 @@ def xmlToZip(name: str, xObj: ET.Element, zipObj: ZipFile) -> None:
# Internal Functions
##

def _formatKeywords(self, text: str) -> tuple[str, T_Formats]:
"""Apply formatting to keywords."""
valid, bits, _ = self._project.index.scanThis("@"+text)
if not valid or not bits or bits[0] not in nwLabels.KEY_NAME:
return "", []

rTxt = f"{self._localLookup(nwLabels.KEY_NAME[bits[0]])}: "
rFmt: T_Formats = [(0, TextFmt.B_B, ""), (len(rTxt) - 1, TextFmt.B_E, "")]
if len(bits) > 1:
if bits[0] == nwKeyWords.TAG_KEY:
rTxt += bits[1]
else:
rTxt += ", ".join(bits[1:])

return rTxt, rFmt

def _processFragments(
self, par: DocXParagraph, pStyle: str, text: str, tFmt: T_Formats | None = None
) -> None:
Expand Down Expand Up @@ -465,7 +442,7 @@ def _textRunToXml(self, text: str, fmt: int, fClass: str = "") -> ET.Element:
xmlSubElem(rPr, _wTag("u"), attrib={_wTag("val"): "single"})
if fmt & X_MRK:
xmlSubElem(rPr, _wTag("shd"), attrib={
_wTag("fill"): COL_MARK_TXT, _wTag("val"): "clear",
_wTag("fill"): _docXCol(self._theme.highlight), _wTag("val"): "clear",
})
if fmt & X_DEL:
xmlSubElem(rPr, _wTag("strike"))
Expand Down
50 changes: 16 additions & 34 deletions novelwriter/formats/tohtml.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from time import time

from novelwriter.common import formatTimeStamp
from novelwriter.constants import nwHeadFmt, nwHtmlUnicode, nwKeyWords, nwLabels
from novelwriter.constants import nwHeadFmt, nwHtmlUnicode
from novelwriter.core.project import NWProject
from novelwriter.formats.shared import BlockFmt, BlockTyp, T_Formats, TextFmt, stripEscape
from novelwriter.formats.tokenizer import Tokenizer
Expand All @@ -48,6 +48,8 @@
TextFmt.SUP_B: (TextFmt.SUP_E, "<sup>"),
TextFmt.SUB_B: (TextFmt.SUB_E, "<sub>"),
TextFmt.COL_B: (TextFmt.COL_E, "<span style='color: {0}'>"),
TextFmt.ANM_B: (TextFmt.ANM_E, "<a name='{0}'>"),
TextFmt.HRF_B: (TextFmt.HRF_E, "<a href='{0}'>"),
}

# Each closer tag, with the id of its corresponding opener and tag format
Expand All @@ -60,6 +62,8 @@
TextFmt.SUP_E: (TextFmt.SUP_B, "</sup>"),
TextFmt.SUB_E: (TextFmt.SUB_B, "</sub>"),
TextFmt.COL_E: (TextFmt.COL_B, "</span>"),
TextFmt.ANM_E: (TextFmt.ANM_B, "</a>"),
TextFmt.HRF_E: (TextFmt.HRF_B, "</a>"),
}

# Empty HTML tag record
Expand Down Expand Up @@ -155,21 +159,21 @@ def doConvert(self) -> None:
lines = []
tHandle = self._handle

for tType, nHead, tText, tFormat, tStyle in self._blocks:
for tType, nHead, tText, tFmt, tStyle in self._blocks:

# Replace < and > with HTML entities
if tFormat:
if tFmt:
# If we have formatting, we must recompute the locations
cText = []
i = 0
for c in tText:
if c == "<":
cText.append("&lt;")
tFormat = [(p + 3 if p > i else p, f, k) for p, f, k in tFormat]
tFmt = [(p + 3 if p > i else p, f, k) for p, f, k in tFmt]
i += 4
elif c == ">":
cText.append("&gt;")
tFormat = [(p + 3 if p > i else p, f, k) for p, f, k in tFormat]
tFmt = [(p + 3 if p > i else p, f, k) for p, f, k in tFmt]
i += 4
else:
cText.append(c)
Expand Down Expand Up @@ -221,7 +225,7 @@ def doConvert(self) -> None:

# Process Text Type
if tType == BlockTyp.TEXT:
lines.append(f"<p{hStyle}>{self._formatText(tText, tFormat)}</p>\n")
lines.append(f"<p{hStyle}>{self._formatText(tText, tFmt)}</p>\n")

elif tType == BlockTyp.TITLE:
tHead = tText.replace(nwHeadFmt.BR, "<br>")
Expand Down Expand Up @@ -250,13 +254,10 @@ def doConvert(self) -> None:
lines.append(f"<p class='skip'{hStyle}>&nbsp;</p>\n")

elif tType == BlockTyp.COMMENT:
lines.append(f"<p class='comment'>{self._formatText(tText, tFormat)}</p>\n")
lines.append(f"<p class='comment'{hStyle}>{self._formatText(tText, tFmt)}</p>\n")

elif tType == BlockTyp.KEYWORD:
tag, text = self._formatKeywords(tText)
kClass = f" class='meta meta-{tag}'" if tag else ""
tTemp = f"<p{kClass}{hStyle}>{text}</p>\n"
lines.append(tTemp)
lines.append(f"<p class='meta'{hStyle}>{self._formatText(tText, tFmt)}</p>\n")

self._result = "".join(lines)
self._fullHTML.append(self._result)
Expand Down Expand Up @@ -431,9 +432,8 @@ def getStyleSheet(self) -> list[str]:
mScale, mScale
))

styles.append("a {color: rgb(66, 113, 174);}")
styles.append("mark {background: rgb(255, 255, 166);}")
styles.append(".keyword {color: rgb(245, 135, 31); font-weight: bold;}")
styles.append("a {{color: {0:s};}}".format(self._theme.head.name(QtHexRgb)))
styles.append("mark {{background: {0:s};}}".format(self._theme.highlight.name(QtHexRgb)))

return styles

Expand All @@ -455,6 +455,8 @@ def _formatText(self, text: str, tFmt: T_Formats) -> str:
if not state.get(fmt, True):
if fmt == TextFmt.COL_B and (color := self._classes.get(data)):
tags.append((pos, m[1].format(color.name(QtHexRgb))))
elif fmt in (TextFmt.ANM_B, TextFmt.HRF_B):
tags.append((pos, m[1].format(data or "#")))
else:
tags.append((pos, m[1]))
state[fmt] = True
Expand Down Expand Up @@ -488,23 +490,3 @@ def _formatText(self, text: str, tFmt: T_Formats) -> str:
temp = temp.replace("\n", "<br>")

return stripEscape(temp)

def _formatKeywords(self, text: str) -> tuple[str, str]:
"""Apply HTML formatting to keywords."""
valid, bits, _ = self._project.index.scanThis("@"+text)
if not valid or not bits or bits[0] not in nwLabels.KEY_NAME:
return "", ""

result = f"<span class='keyword'>{self._localLookup(nwLabels.KEY_NAME[bits[0]])}:</span> "
if len(bits) > 1:
if bits[0] == nwKeyWords.TAG_KEY:
one, two = self._project.index.parseValue(bits[1])
result += f"<a class='tag' name='tag_{one}'>{one}</a>"
if two:
result += f" | <span class='optional'>{two}</a>"
else:
result += ", ".join(
f"<a class='tag' href='#tag_{t}'>{t}</a>" for t in bits[1:]
)

return bits[0][1:], result
94 changes: 61 additions & 33 deletions novelwriter/formats/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,11 @@ def __init__(self, project: NWProject) -> None:
self._doSynopsis = False # Also process synopsis comments
self._doComments = False # Also process comments
self._doKeywords = False # Also process keywords like tags and references
self._skipKeywords = set() # Keywords to ignore
self._keepBreaks = True # Keep line breaks in paragraphs
self._defaultAlign = "left" # The default text alignment

self._skipKeywords: set[str] = set() # Keywords to ignore

# Other Setting
self._theme = TextDocumentTheme()
self._classes: dict[str, QColor] = {}
Expand Down Expand Up @@ -464,6 +465,9 @@ def initDocument(self) -> None:
self._classes["comment"] = self._theme.comment
self._classes["dialog"] = self._theme.dialog
self._classes["altdialog"] = self._theme.altdialog
self._classes["tag"] = self._theme.tag
self._classes["keyword"] = self._theme.keyword
self._classes["optional"] = self._theme.optional
return

def addRootHeading(self, tHandle: str) -> None:
Expand Down Expand Up @@ -614,7 +618,9 @@ def tokenizeText(self) -> None:
if cStyle in (nwComment.SYNOPSIS, nwComment.SHORT, nwComment.PLAIN):
bStyle = COMMENT_STYLE[cStyle]
tLine, tFmt = self._formatComment(bStyle, cKey, cText)
blocks.append((BlockTyp.COMMENT, nHead, tLine, tFmt, sAlign))
blocks.append((
BlockTyp.COMMENT, nHead, tLine, tFmt, sAlign
))
if self._keepRaw:
tmpMarkdown.append(f"{aLine}\n")

Expand All @@ -633,31 +639,14 @@ def tokenizeText(self) -> None:
if not self._doKeywords:
continue

valid, bits, _ = self._project.index.scanThis(aLine)
if (
valid and bits and bits[0] in nwLabels.KEY_NAME
and bits[0] not in self._skipKeywords
):
tLine, tFmt = self._formatMeta(aLine)
if tLine:
blocks.append((
BlockTyp.KEYWORD, nHead, aLine[1:].strip(), [], sAlign
BlockTyp.KEYWORD, nHead, tLine, tFmt, sAlign
))
if self._keepRaw:
tmpMarkdown.append(f"{aLine}\n")

# valid, bits, _ = self._project.index.scanThis("@"+text)
# if not valid or not bits or bits[0] not in nwLabels.KEY_NAME:
# return "", []

# rTxt = f"{self._localLookup(nwLabels.KEY_NAME[bits[0]])}: "
# rFmt: T_Formats = [(0, TextFmt.B_B, ""), (len(rTxt) - 1, TextFmt.B_E, "")]
# if len(bits) > 1:
# if bits[0] == nwKeyWords.TAG_KEY:
# rTxt += bits[1]
# else:
# rTxt += ", ".join(bits[1:])

# return rTxt, rFmt

elif aLine.startswith(("# ", "#! ")):
# Title or Partition Headings
# ===========================
Expand Down Expand Up @@ -1033,22 +1022,12 @@ def countStats(self) -> None:
allChars += nChars
allWordChars += nWChars

elif tType == BlockTyp.COMMENT:
elif tType in (BlockTyp.COMMENT, BlockTyp.KEYWORD):
words = tText.split()
allWords += len(words)
allChars += len(tText)
allWordChars += len("".join(words))

elif tType == BlockTyp.KEYWORD:
valid, bits, _ = self._project.index.scanThis("@"+tText)
if valid and bits:
key = self._localLookup(nwLabels.KEY_NAME[bits[0]])
text = "{0}: {1}".format(key, ", ".join(bits[1:]))
words = text.split()
allWords += len(words)
allChars += len(text)
allWordChars += len("".join(words))

self._counts["titleCount"] = titleCount
self._counts["paragraphCount"] = paragraphCount

Expand Down Expand Up @@ -1112,6 +1091,55 @@ def _formatComment(self, style: ComStyle, key: str, text: str) -> tuple[str, T_F
rFmt.extend((p + shift, f, d) for p, f, d in tFmt)
return tTxt, rFmt

def _formatMeta(self, text: str) -> tuple[str, T_Formats]:
"""Parse a meta line into a """
txt = []
fmt = []
valid, bits, _ = self._project.index.scanThis(text)
if valid and bits and bits[0] in nwLabels.KEY_NAME and bits[0] not in self._skipKeywords:
pos = 0
lbl = f"{self._localLookup(nwLabels.KEY_NAME[bits[0]])}:"
end = len(lbl)
fmt = [
(pos, TextFmt.B_B, ""), (pos, TextFmt.COL_B, "keyword"),
(end, TextFmt.B_E, ""), (end, TextFmt.COL_E, ""),
]
txt = [lbl, " "]
pos = end + 1

if (num := len(bits)) > 1:
if bits[0] == nwKeyWords.TAG_KEY:
one, two = self._project.index.parseValue(bits[1])
end = pos + len(one)
fmt.append((pos, TextFmt.COL_B, "tag"))
fmt.append((pos, TextFmt.ANM_B, f"tag_{one}".lower()))
fmt.append((end, TextFmt.ANM_E, ""))
fmt.append((end, TextFmt.COL_E, ""))
txt.append(one)
pos = end
if two:
txt.append(" | ")
pos += 3
end = pos + len(two)
fmt.append((pos, TextFmt.COL_B, "optional"))
fmt.append((end, TextFmt.COL_E, ""))
txt.append(two)
pos = end
else:
for n, bit in enumerate(bits[1:], 2):
end = pos + len(bit)
fmt.append((pos, TextFmt.COL_B, "tag"))
fmt.append((pos, TextFmt.HRF_B, f"#tag_{bit}".lower()))
fmt.append((end, TextFmt.HRF_E, ""))
fmt.append((end, TextFmt.COL_E, ""))
txt.append(bit)
pos = end
if n < num:
txt.append(", ")
pos += 2

return "".join(txt), fmt

def _extractFormats(
self, text: str, skip: int = 0, hDialog: bool = False
) -> tuple[str, T_Formats]:
Expand Down
21 changes: 3 additions & 18 deletions novelwriter/formats/tomarkdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

from pathlib import Path

from novelwriter.constants import nwHeadFmt, nwLabels, nwUnicode
from novelwriter.constants import nwHeadFmt, nwUnicode
from novelwriter.core.project import NWProject
from novelwriter.formats.shared import BlockFmt, BlockTyp, T_Formats, TextFmt
from novelwriter.formats.tokenizer import Tokenizer
Expand Down Expand Up @@ -154,7 +154,8 @@ def doConvert(self) -> None:
lines.append(f"{self._formatText(tText, tFormat, mTags)}\n\n")

elif tType == BlockTyp.KEYWORD:
lines.append(self._formatKeywords(tText, tStyle))
end = " \n" if tStyle & BlockFmt.Z_BTMMRG else "\n\n"
lines.append(f"{self._formatText(tText, tFormat, mTags)}{end}")

self._result = "".join(lines)
self._fullMD.append(self._result)
Expand Down Expand Up @@ -215,19 +216,3 @@ def _formatText(self, text: str, tFmt: T_Formats, tags: dict[TextFmt, str]) -> s
md = tags.get(fmt, "")
temp = f"{temp[:pos]}{md}{temp[pos:]}"
return temp

def _formatKeywords(self, text: str, style: BlockFmt) -> str:
"""Apply Markdown formatting to keywords."""
valid, bits, _ = self._project.index.scanThis("@"+text)
if not valid or not bits:
return ""

result = ""
if bits[0] in nwLabels.KEY_NAME:
result += f"**{self._localLookup(nwLabels.KEY_NAME[bits[0]])}:** "
if len(bits) > 1:
result += ", ".join(bits[1:])

result += " \n" if style & BlockFmt.Z_BTMMRG else "\n\n"

return result
Loading

0 comments on commit bd5442c

Please sign in to comment.