Skip to content
This repository has been archived by the owner on Sep 11, 2024. It is now read-only.

Commit

Permalink
Merge pull request #5515 from rda0/maths-parsing-latex
Browse files Browse the repository at this point in the history
Use LaTeX and TeX delimiters by default
  • Loading branch information
jryans authored Apr 9, 2021
2 parents cf06948 + 31e85ec commit ae2082b
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 26 deletions.
8 changes: 4 additions & 4 deletions src/editor/deserialize.ts
Original file line number Diff line number Diff line change
Expand Up @@ -143,11 +143,11 @@ function parseElement(n: HTMLElement, partCreator: PartCreator, lastNode: HTMLEl
// math nodes are translated back into delimited latex strings
if (n.hasAttribute("data-mx-maths")) {
const delimLeft = (n.nodeName == "SPAN") ?
(SdkConfig.get()['latex_maths_delims'] || {})['inline_left'] || "$" :
(SdkConfig.get()['latex_maths_delims'] || {})['display_left'] || "$$";
((SdkConfig.get()['latex_maths_delims'] || {})['inline'] || {})['left'] || "\\(" :
((SdkConfig.get()['latex_maths_delims'] || {})['display'] || {})['left'] || "\\[";
const delimRight = (n.nodeName == "SPAN") ?
(SdkConfig.get()['latex_maths_delims'] || {})['inline_right'] || "$" :
(SdkConfig.get()['latex_maths_delims'] || {})['display_right'] || "$$";
((SdkConfig.get()['latex_maths_delims'] || {})['inline'] || {})['right'] || "\\)" :
((SdkConfig.get()['latex_maths_delims'] || {})['display'] || {})['right'] || "\\]";
const tex = n.getAttribute("data-mx-maths");
return partCreator.plain(delimLeft + tex + delimRight);
} else if (!checkDescendInto(n)) {
Expand Down
102 changes: 80 additions & 22 deletions src/editor/serialize.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,21 +47,65 @@ export function mdSerialize(model: EditorModel) {

export function htmlSerializeIfNeeded(model: EditorModel, {forceHTML = false} = {}) {
let md = mdSerialize(model);
// copy of raw input to remove unwanted math later
const orig = md;

if (SettingsStore.getValue("feature_latex_maths")) {
const displayPattern = (SdkConfig.get()['latex_maths_delims'] || {})['display_pattern'] ||
"\\$\\$(([^$]|\\\\\\$)*)\\$\\$";
const inlinePattern = (SdkConfig.get()['latex_maths_delims'] || {})['inline_pattern'] ||
"\\$(([^$]|\\\\\\$)*)\\$";

md = md.replace(RegExp(displayPattern, "gm"), function(m, p1) {
const p1e = AllHtmlEntities.encode(p1);
return `<div data-mx-maths="${p1e}">\n\n</div>\n\n`;
});

md = md.replace(RegExp(inlinePattern, "gm"), function(m, p1) {
const p1e = AllHtmlEntities.encode(p1);
return `<span data-mx-maths="${p1e}"></span>`;
const patternNames = ['tex', 'latex'];
const patternTypes = ['display', 'inline'];
const patternDefaults = {
"tex": {
// detect math with tex delimiters, inline: $...$, display $$...$$
// preferably use negative lookbehinds, not supported in all major browsers:
// const displayPattern = "^(?<!\\\\)\\$\\$(?![ \\t])(([^$]|\\\\\\$)+?)\\$\\$$";
// const inlinePattern = "(?:^|\\s)(?<!\\\\)\\$(?!\\s)(([^$]|\\\\\\$)+?)(?<!\\\\|\\s)\\$";

// conditions for display math detection $$...$$:
// - pattern starts at beginning of line or is not prefixed with backslash or dollar
// - left delimiter ($$) is not escaped by backslash
"display": "(^|[^\\\\$])\\$\\$(([^$]|\\\\\\$)+?)\\$\\$",

// conditions for inline math detection $...$:
// - pattern starts at beginning of line, follows whitespace character or punctuation
// - pattern is on a single line
// - left and right delimiters ($) are not escaped by backslashes
// - left delimiter is not followed by whitespace character
// - right delimiter is not prefixed with whitespace character
"inline":
"(^|\\s|[.,!?:;])(?!\\\\)\\$(?!\\s)(([^$\\n]|\\\\\\$)*([^\\\\\\s\\$]|\\\\\\$)(?:\\\\\\$)?)\\$",
},
"latex": {
// detect math with latex delimiters, inline: \(...\), display \[...\]

// conditions for display math detection \[...\]:
// - pattern starts at beginning of line or is not prefixed with backslash
// - pattern is not empty
"display": "(^|[^\\\\])\\\\\\[(?!\\\\\\])(.*?)\\\\\\]",

// conditions for inline math detection \(...\):
// - pattern starts at beginning of line or is not prefixed with backslash
// - pattern is not empty
"inline": "(^|[^\\\\])\\\\\\((?!\\\\\\))(.*?)\\\\\\)",
},
};

patternNames.forEach(function(patternName) {
patternTypes.forEach(function(patternType) {
// get the regex replace pattern from config or use the default
const pattern = (((SdkConfig.get()["latex_maths_delims"] ||
{})[patternType] || {})["pattern"] || {})[patternName] ||
patternDefaults[patternName][patternType];

md = md.replace(RegExp(pattern, "gms"), function(m, p1, p2) {
const p2e = AllHtmlEntities.encode(p2);
switch (patternType) {
case "display":
return `${p1}<div data-mx-maths="${p2e}">\n\n</div>\n\n`;
case "inline":
return `${p1}<span data-mx-maths="${p2e}"></span>`;
}
});
});
});

// make sure div tags always start on a new line, otherwise it will confuse
Expand All @@ -73,15 +117,29 @@ export function htmlSerializeIfNeeded(model: EditorModel, {forceHTML = false} =
if (!parser.isPlainText() || forceHTML) {
// feed Markdown output to HTML parser
const phtml = cheerio.load(parser.toHTML(),
{ _useHtmlParser2: true, decodeEntities: false })

// add fallback output for latex math, which should not be interpreted as markdown
phtml('div, span').each(function(i, e) {
const tex = phtml(e).attr('data-mx-maths')
if (tex) {
phtml(e).html(`<code>${tex}</code>`)
}
});
{ _useHtmlParser2: true, decodeEntities: false });

if (SettingsStore.getValue("feature_latex_maths")) {
// original Markdown without LaTeX replacements
const parserOrig = new Markdown(orig);
const phtmlOrig = cheerio.load(parserOrig.toHTML(),
{ _useHtmlParser2: true, decodeEntities: false });

// since maths delimiters are handled before Markdown,
// code blocks could contain mangled content.
// replace code blocks with original content
phtmlOrig('code').each(function(i) {
phtml('code').eq(i).text(phtmlOrig('code').eq(i).text());
});

// add fallback output for latex math, which should not be interpreted as markdown
phtml('div, span').each(function(i, e) {
const tex = phtml(e).attr('data-mx-maths')
if (tex) {
phtml(e).html(`<code>${tex}</code>`)
}
});
}
return phtml.html();
}
// ensure removal of escape backslashes in non-Markdown messages
Expand Down

0 comments on commit ae2082b

Please sign in to comment.