Skip to content

Commit

Permalink
Text Editor: Replace default markdown-it HTML parser
Browse files Browse the repository at this point in the history
It's not cursed af anymore! I can actually sanitise it how I want! I can actually convert from html elements with specific styles to prosemirror nodes/marks without doing things that are even more cursed!
  • Loading branch information
ChiriVulpes committed Oct 31, 2024
1 parent 1e695b7 commit 63d8576
Show file tree
Hide file tree
Showing 9 changed files with 1,030 additions and 61 deletions.
1 change: 1 addition & 0 deletions .eslintrc.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ module.exports = /** @type {import("eslint").Linter.BaseConfig & import("@typesc
"no-inner-declarations": ["off"],
"no-unexpected-multiline": ["off"], // sometimes i want to do zero indexing on a new line
"semi": ["warn", "never"],
"no-cond-assign": ["off"], // i have literally never had a bug due to this before, so loosening this restriction

// typescript-eslint
"@typescript-eslint/no-unused-vars": ["off"], // literally just what typescript already has, no thanks
Expand Down
14 changes: 7 additions & 7 deletions src/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion src/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"private": true,
"dependencies": {
"api.fluff4.me": "^1.0.73",
"api.fluff4.me": "^1.0.75",
"prosemirror-example-setup": "1.2.3",
"prosemirror-markdown": "1.13.1",
"prosemirror-state": "1.4.3",
Expand Down
73 changes: 21 additions & 52 deletions src/ui/component/core/TextEditor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ import Objects from "utility/Objects"
import type { UnsubscribeState } from "utility/State"
import State from "utility/State"
import Store from "utility/Store"
import type Strings from "utility/Strings"
import MarkdownItHTML from "utility/string/MarkdownItHTML"
import type Strings from "utility/string/Strings"
import Time from "utility/Time"
import type { PartialRecord } from "utility/Type"
import w3cKeyname from "w3c-keyname"
Expand Down Expand Up @@ -421,7 +422,9 @@ const REGEX_ATTRIBUTE = (() => {

const REGEX_CSS_PROPERTY = /^[-a-zA-Z_][a-zA-Z0-9_-]*$/

const markdown = MarkdownIt("commonmark", { html: true })
const markdown = new MarkdownIt("commonmark", { html: true, breaks: true })
MarkdownItHTML.use(markdown, MarkdownItHTML.Options()
.disallowTags("img", "figure", "figcaption", "map", "area"))
markdown.inline.ruler.enable("strikethrough")
markdown.inline.ruler2.enable("strikethrough")

Expand Down Expand Up @@ -548,10 +551,10 @@ markdown.inline.ruler2.before("emphasis", "underline", function underline_postPr
////////////////////////////////////

interface MarkdownHTMLTokenRemapSpec {
getAttrs: (token: FluffToken) => Attrs | true | undefined
getAttrs: (token: MarkdownItHTML.Token) => Attrs | true | undefined
}

const markdownHTMLRegistry: PartialRecord<Nodes, MarkdownHTMLTokenRemapSpec> = {
const markdownHTMLNodeRegistry: PartialRecord<Nodes, MarkdownHTMLTokenRemapSpec> = {
text_align: {
getAttrs: token => {
const align = token.style?.get("text-align")
Expand All @@ -563,13 +566,10 @@ const markdownHTMLRegistry: PartialRecord<Nodes, MarkdownHTMLTokenRemapSpec> = {
},
}

const decodeHTMLEntities = (text: string) =>
new DOMParser().parseFromString(text, "text/html").body.textContent ?? ""
// const markdownHTMLMarkRegistry: PartialRecord<Marks, MarkdownHTMLTokenRemapSpec> = {
// }

interface FluffToken extends Token {
depth: number
skipped?: true
style?: Map<string, string>
interface FluffToken extends MarkdownItHTML.Token {
nodeAttrs?: Attrs
}

Expand All @@ -578,58 +578,28 @@ markdown.parse = (src, env) => {
const rawTokens = originalParse.call(markdown, src, env) as FluffToken[]

const tokens: FluffToken[] = []
// the `depth` of the parent `_open` token
let depth = 0
// the `level` of the parent `_open` token
let level = 0
for (const token of rawTokens) {
if (token.type !== "html_block") {
token.depth = token.nesting === -1 ? depth : depth + 1
depth += token.nesting
if (token.type !== "html_block_open" && token.type !== "html_block_close") {
tokens.push(token)
continue
}

let tag = token.content.trim()
if (!tag.startsWith("<") || !tag.endsWith(">")) {
console.warn("Invalid HTML in markdown:", tag)
token.skipped = true
continue
}

tag = tag.slice(1, -1)
const closing = tag.startsWith("/")
token.nesting = closing ? -1 : 1

const attrsStartIndex = tag.indexOf(" ") + 1
const type = !attrsStartIndex ? tag : tag.slice(0, attrsStartIndex - 1)
if (attrsStartIndex && !closing) {
const attrString = tag.slice(attrsStartIndex)

token.attrs = [...attrString.matchAll(REGEX_ATTRIBUTE)]
.map(([, attribute, value]) => {
value = value.startsWith("'") || value.startsWith('"') ? value.slice(1, -1) : value
return [attribute.toLowerCase(), decodeHTMLEntities(value)] as const
})

token.style = parseStyleAttributeValue(token.attrGet("style"))
}

token.content = type
if (closing) {
const opening = tokens.findLast(token => token.depth === depth)
if (token.nesting < 0) {
const opening = tokens.findLast(token => token.level === level)
if (!opening) {
console.warn("Invalid HTML in markdown:", tag)
token.skipped = true
console.warn("Invalid HTML in markdown:", token.raw)
continue
}

token.type = `${opening.type.slice(0, -5)}_close`
token.depth = depth
tokens.push(token)
depth += token.nesting
level = token.level
continue
}

for (const [nodeType, spec] of Object.entries(markdownHTMLRegistry)) {
for (const [nodeType, spec] of Object.entries(markdownHTMLNodeRegistry)) {
const attrs = spec.getAttrs(token)
if (attrs) {
token.type = nodeType
Expand All @@ -640,8 +610,7 @@ markdown.parse = (src, env) => {
}

token.type = `${token.type}_open`
depth += token.nesting
token.depth = depth
level = token.level
tokens.push(token)
}

Expand All @@ -659,7 +628,7 @@ const markdownParser = new MarkdownParser(schema, markdown, Objects.filterNullis
mark: "strikethrough",
},

...Object.entries(markdownHTMLRegistry)
...Object.entries(markdownHTMLNodeRegistry)
.toObject(([tokenType, spec]) => [tokenType, ({
block: tokenType,
getAttrs: (token) => (token as FluffToken).nodeAttrs ?? {},
Expand All @@ -670,7 +639,7 @@ const markdownSerializer = new MarkdownSerializer(
{
...defaultMarkdownSerializer.nodes,
text_align: (state, node, parent, index) => {
state.write(`<div style="text-align:${node.attrs.align}">\n\n`)
state.write(`<div style="text-align:${node.attrs.align}">\n`)
state.renderContent(node)
state.write("</div>")
state.closeBlock(node)
Expand Down
32 changes: 32 additions & 0 deletions src/ui/view/HomeView.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import MarkdownIt from "markdown-it"
import Component from "ui/Component"
import Block from "ui/component/core/Block"
import Form from "ui/component/core/Form"
import LabelledTable from "ui/component/core/LabelledTable"
import TextEditor from "ui/component/core/TextEditor"
import View from "ui/view/View"
import ViewDefinition from "ui/view/ViewDefinition"
import MarkdownItHTML from "utility/string/MarkdownItHTML"

export default ViewDefinition({
create: () => {
Expand All @@ -12,6 +15,35 @@ export default ViewDefinition({
const block = Block().appendTo(view)
const form = block.and(Form, block.title)

const output = Component("div")
Component("div")
.attributes.set("contenteditable", "plaintext-only")
.style.setProperty("white-space", "pre-wrap")
.style.setProperty("font", "inherit")
.style.setProperty("background", "#222")
.style.setProperty("width", "100%")
.style.setProperty("height", "400px")
.style.setProperty("padding", "0.5em")
.style.setProperty("box-sizing", "border-box")
.event.subscribe("input", event => {
const text = event.component.element.textContent ?? ""
const md = new MarkdownIt("commonmark", { html: true, breaks: true })
MarkdownItHTML.use(md, MarkdownItHTML.Options()
.disallowTags("img", "figure", "figcaption", "map", "area"))
console.log(md.parse(text, {}))
output.element.innerHTML = md.render(text)
})
.appendTo(form.content)

output
.style.setProperty("font", "inherit")
.style.setProperty("background", "#222")
.style.setProperty("width", "100%")
.style.setProperty("padding", "0.5em")
.style.setProperty("margin-top", "1em")
.style.setProperty("box-sizing", "border-box")
.appendTo(form.content)

const table = LabelledTable().appendTo(form.content)

table.label(label => label.text.set("test editor"))
Expand Down
13 changes: 13 additions & 0 deletions src/utility/Arrays.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ declare global {
findMap<RETURN> (predicate: (value: T, index: number, obj: T[]) => boolean, mapper: (value: T, index: number, obj: T[]) => RETURN): RETURN | undefined

groupBy<GROUP> (grouper: (value: T, index: number, obj: T[]) => GROUP): [GROUP, T[]][]

filterInPlace: Array<T>["filter"]
mapInPlace: Array<T>["filter"]
}
}

Expand Down Expand Up @@ -298,6 +301,16 @@ namespace Arrays {

return Object.entries(result)
})

Define(Array.prototype, "filterInPlace", function (filter): any[] {
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
return this.splice(0, Infinity, ...this.filter(filter))
})

Define(Array.prototype, "mapInPlace", function (mapper): any[] {
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
return this.splice(0, Infinity, ...this.map(mapper))
})
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/utility/Time.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import Strings from "utility/Strings"
import Strings from "utility/string/Strings"

namespace Time {
export type ISO = `${bigint}-${bigint}-${bigint}T${bigint}:${bigint}:${number}Z`
Expand Down
Loading

0 comments on commit 63d8576

Please sign in to comment.