Text Editor: Replace default markdown-it HTML parser

It's not cursed af anymore! I can actually sanitise it how I want! I can actually convert from html elements with specific styles to prosemirror nodes/marks without doing things that are even more cursed!
fluff4me · Oct 31, 2024 · 63d8576 · 63d8576
1 parent 1e695b7
commit 63d8576
Show file tree

Hide file tree

Showing 9 changed files with 1,030 additions and 61 deletions.
diff --git a/.eslintrc.js b/.eslintrc.js
@@ -26,6 +26,7 @@ module.exports = /** @type {import("eslint").Linter.BaseConfig & import("@typesc
 		"no-inner-declarations": ["off"],
 		"no-unexpected-multiline": ["off"], // sometimes i want to do zero indexing on a new line
 		"semi": ["warn", "never"],
+		"no-cond-assign": ["off"], // i have literally never had a bug due to this before, so loosening this restriction
 
 		// typescript-eslint
 		"@typescript-eslint/no-unused-vars": ["off"], // literally just what typescript already has, no thanks

diff --git a/src/package-lock.json b/src/package-lock.json
diff --git a/src/package.json b/src/package.json
@@ -1,7 +1,7 @@
 {
 	"private": true,
 	"dependencies": {
-		"api.fluff4.me": "^1.0.73",
+		"api.fluff4.me": "^1.0.75",
 		"prosemirror-example-setup": "1.2.3",
 		"prosemirror-markdown": "1.13.1",
 		"prosemirror-state": "1.4.3",

diff --git a/src/ui/component/core/TextEditor.ts b/src/ui/component/core/TextEditor.ts
@@ -38,7 +38,8 @@ import Objects from "utility/Objects"
 import type { UnsubscribeState } from "utility/State"
 import State from "utility/State"
 import Store from "utility/Store"
-import type Strings from "utility/Strings"
+import MarkdownItHTML from "utility/string/MarkdownItHTML"
+import type Strings from "utility/string/Strings"
 import Time from "utility/Time"
 import type { PartialRecord } from "utility/Type"
 import w3cKeyname from "w3c-keyname"
@@ -421,7 +422,9 @@ const REGEX_ATTRIBUTE = (() => {
 
 const REGEX_CSS_PROPERTY = /^[-a-zA-Z_][a-zA-Z0-9_-]*$/
 
-const markdown = MarkdownIt("commonmark", { html: true })
+const markdown = new MarkdownIt("commonmark", { html: true, breaks: true })
+MarkdownItHTML.use(markdown, MarkdownItHTML.Options()
+	.disallowTags("img", "figure", "figcaption", "map", "area"))
 markdown.inline.ruler.enable("strikethrough")
 markdown.inline.ruler2.enable("strikethrough")
 
@@ -548,10 +551,10 @@ markdown.inline.ruler2.before("emphasis", "underline", function underline_postPr
 ////////////////////////////////////
 
 interface MarkdownHTMLTokenRemapSpec {
-	getAttrs: (token: FluffToken) => Attrs | true | undefined
+	getAttrs: (token: MarkdownItHTML.Token) => Attrs | true | undefined
 }
 
-const markdownHTMLRegistry: PartialRecord<Nodes, MarkdownHTMLTokenRemapSpec> = {
+const markdownHTMLNodeRegistry: PartialRecord<Nodes, MarkdownHTMLTokenRemapSpec> = {
 	text_align: {
 		getAttrs: token => {
 			const align = token.style?.get("text-align")
@@ -563,13 +566,10 @@ const markdownHTMLRegistry: PartialRecord<Nodes, MarkdownHTMLTokenRemapSpec> = {
 	},
 }
 
-const decodeHTMLEntities = (text: string) =>
-	new DOMParser().parseFromString(text, "text/html").body.textContent ?? ""
+// const markdownHTMLMarkRegistry: PartialRecord<Marks, MarkdownHTMLTokenRemapSpec> = {
+// }
 
-interface FluffToken extends Token {
-	depth: number
-	skipped?: true
-	style?: Map<string, string>
+interface FluffToken extends MarkdownItHTML.Token {
 	nodeAttrs?: Attrs
 }
 
@@ -578,58 +578,28 @@ markdown.parse = (src, env) => {
 	const rawTokens = originalParse.call(markdown, src, env) as FluffToken[]
 
 	const tokens: FluffToken[] = []
-	// the `depth` of the parent `_open` token
-	let depth = 0
+	// the `level` of the parent `_open` token
+	let level = 0
 	for (const token of rawTokens) {
-		if (token.type !== "html_block") {
-			token.depth = token.nesting === -1 ? depth : depth + 1
-			depth += token.nesting
+		if (token.type !== "html_block_open" && token.type !== "html_block_close") {
 			tokens.push(token)
 			continue
 		}
 
-		let tag = token.content.trim()
-		if (!tag.startsWith("<") || !tag.endsWith(">")) {
-			console.warn("Invalid HTML in markdown:", tag)
-			token.skipped = true
-			continue
-		}
-
-		tag = tag.slice(1, -1)
-		const closing = tag.startsWith("/")
-		token.nesting = closing ? -1 : 1
-
-		const attrsStartIndex = tag.indexOf(" ") + 1
-		const type = !attrsStartIndex ? tag : tag.slice(0, attrsStartIndex - 1)
-		if (attrsStartIndex && !closing) {
-			const attrString = tag.slice(attrsStartIndex)
-
-			token.attrs = [...attrString.matchAll(REGEX_ATTRIBUTE)]
-				.map(([, attribute, value]) => {
-					value = value.startsWith("'") || value.startsWith('"') ? value.slice(1, -1) : value
-					return [attribute.toLowerCase(), decodeHTMLEntities(value)] as const
-				})
-
-			token.style = parseStyleAttributeValue(token.attrGet("style"))
-		}
-
-		token.content = type
-		if (closing) {
-			const opening = tokens.findLast(token => token.depth === depth)
+		if (token.nesting < 0) {
+			const opening = tokens.findLast(token => token.level === level)
 			if (!opening) {
-				console.warn("Invalid HTML in markdown:", tag)
-				token.skipped = true
+				console.warn("Invalid HTML in markdown:", token.raw)
 				continue
 			}
 
 			token.type = `${opening.type.slice(0, -5)}_close`
-			token.depth = depth
 			tokens.push(token)
-			depth += token.nesting
+			level = token.level
 			continue
 		}
 
-		for (const [nodeType, spec] of Object.entries(markdownHTMLRegistry)) {
+		for (const [nodeType, spec] of Object.entries(markdownHTMLNodeRegistry)) {
 			const attrs = spec.getAttrs(token)
 			if (attrs) {
 				token.type = nodeType
@@ -640,8 +610,7 @@ markdown.parse = (src, env) => {
 		}
 
 		token.type = `${token.type}_open`
-		depth += token.nesting
-		token.depth = depth
+		level = token.level
 		tokens.push(token)
 	}
 
@@ -659,7 +628,7 @@ const markdownParser = new MarkdownParser(schema, markdown, Objects.filterNullis
 		mark: "strikethrough",
 	},
 
-	...Object.entries(markdownHTMLRegistry)
+	...Object.entries(markdownHTMLNodeRegistry)
 		.toObject(([tokenType, spec]) => [tokenType, ({
 			block: tokenType,
 			getAttrs: (token) => (token as FluffToken).nodeAttrs ?? {},
@@ -670,7 +639,7 @@ const markdownSerializer = new MarkdownSerializer(
 	{
 		...defaultMarkdownSerializer.nodes,
 		text_align: (state, node, parent, index) => {
-			state.write(`<div style="text-align:${node.attrs.align}">\n\n`)
+			state.write(`<div style="text-align:${node.attrs.align}">\n`)
 			state.renderContent(node)
 			state.write("</div>")
 			state.closeBlock(node)

diff --git a/src/ui/view/HomeView.ts b/src/ui/view/HomeView.ts
@@ -1,9 +1,12 @@
+import MarkdownIt from "markdown-it"
+import Component from "ui/Component"
 import Block from "ui/component/core/Block"
 import Form from "ui/component/core/Form"
 import LabelledTable from "ui/component/core/LabelledTable"
 import TextEditor from "ui/component/core/TextEditor"
 import View from "ui/view/View"
 import ViewDefinition from "ui/view/ViewDefinition"
+import MarkdownItHTML from "utility/string/MarkdownItHTML"
 
 export default ViewDefinition({
 	create: () => {
@@ -12,6 +15,35 @@ export default ViewDefinition({
 		const block = Block().appendTo(view)
 		const form = block.and(Form, block.title)
 
+		const output = Component("div")
+		Component("div")
+			.attributes.set("contenteditable", "plaintext-only")
+			.style.setProperty("white-space", "pre-wrap")
+			.style.setProperty("font", "inherit")
+			.style.setProperty("background", "#222")
+			.style.setProperty("width", "100%")
+			.style.setProperty("height", "400px")
+			.style.setProperty("padding", "0.5em")
+			.style.setProperty("box-sizing", "border-box")
+			.event.subscribe("input", event => {
+				const text = event.component.element.textContent ?? ""
+				const md = new MarkdownIt("commonmark", { html: true, breaks: true })
+				MarkdownItHTML.use(md, MarkdownItHTML.Options()
+					.disallowTags("img", "figure", "figcaption", "map", "area"))
+				console.log(md.parse(text, {}))
+				output.element.innerHTML = md.render(text)
+			})
+			.appendTo(form.content)
+
+		output
+			.style.setProperty("font", "inherit")
+			.style.setProperty("background", "#222")
+			.style.setProperty("width", "100%")
+			.style.setProperty("padding", "0.5em")
+			.style.setProperty("margin-top", "1em")
+			.style.setProperty("box-sizing", "border-box")
+			.appendTo(form.content)
+
 		const table = LabelledTable().appendTo(form.content)
 
 		table.label(label => label.text.set("test editor"))

diff --git a/src/utility/Arrays.ts b/src/utility/Arrays.ts
@@ -66,6 +66,9 @@ declare global {
 		findMap<RETURN> (predicate: (value: T, index: number, obj: T[]) => boolean, mapper: (value: T, index: number, obj: T[]) => RETURN): RETURN | undefined
 
 		groupBy<GROUP> (grouper: (value: T, index: number, obj: T[]) => GROUP): [GROUP, T[]][]
+
+		filterInPlace: Array<T>["filter"]
+		mapInPlace: Array<T>["filter"]
 	}
 }
 
@@ -298,6 +301,16 @@ namespace Arrays {
 
 			return Object.entries(result)
 		})
+
+		Define(Array.prototype, "filterInPlace", function (filter): any[] {
+			// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
+			return this.splice(0, Infinity, ...this.filter(filter))
+		})
+
+		Define(Array.prototype, "mapInPlace", function (mapper): any[] {
+			// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
+			return this.splice(0, Infinity, ...this.map(mapper))
+		})
 	}
 }
 

diff --git a/src/utility/Time.ts b/src/utility/Time.ts
@@ -1,4 +1,4 @@
-import Strings from "utility/Strings"
+import Strings from "utility/string/Strings"
 
 namespace Time {
 	export type ISO = `${bigint}-${bigint}-${bigint}T${bigint}:${bigint}:${number}Z`