From 63d8576c8d1f6951610476873ab46d2b316e169e Mon Sep 17 00:00:00 2001 From: Chiri Vulpes Date: Thu, 31 Oct 2024 14:23:58 +1300 Subject: [PATCH] Text Editor: Replace default markdown-it HTML parser It's not cursed af anymore! I can actually sanitise it how I want! I can actually convert from html elements with specific styles to prosemirror nodes/marks without doing things that are even more cursed! --- .eslintrc.js | 1 + src/package-lock.json | 14 +- src/package.json | 2 +- src/ui/component/core/TextEditor.ts | 73 +- src/ui/view/HomeView.ts | 32 + src/utility/Arrays.ts | 13 + src/utility/Time.ts | 2 +- src/utility/string/MarkdownItHTML.ts | 954 +++++++++++++++++++++++++++ src/utility/{ => string}/Strings.ts | 0 9 files changed, 1030 insertions(+), 61 deletions(-) create mode 100644 src/utility/string/MarkdownItHTML.ts rename src/utility/{ => string}/Strings.ts (100%) diff --git a/.eslintrc.js b/.eslintrc.js index 4f1bfa9..a91a756 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -26,6 +26,7 @@ module.exports = /** @type {import("eslint").Linter.BaseConfig & import("@typesc "no-inner-declarations": ["off"], "no-unexpected-multiline": ["off"], // sometimes i want to do zero indexing on a new line "semi": ["warn", "never"], + "no-cond-assign": ["off"], // i have literally never had a bug due to this before, so loosening this restriction // typescript-eslint "@typescript-eslint/no-unused-vars": ["off"], // literally just what typescript already has, no thanks diff --git a/src/package-lock.json b/src/package-lock.json index 4048de6..799b856 100644 --- a/src/package-lock.json +++ b/src/package-lock.json @@ -5,7 +5,7 @@ "packages": { "": { "dependencies": { - "api.fluff4.me": "^1.0.73", + "api.fluff4.me": "^1.0.75", "prosemirror-example-setup": "1.2.3", "prosemirror-markdown": "1.13.1", "prosemirror-state": "1.4.3", @@ -32,9 +32,9 @@ "integrity": "sha512-RGdgjQUZba5p6QEFAVx2OGb8rQDL/cPRG7GiedRzMcJ1tYnUANBncjbSB1NRGwbvjcPeikRABz2nshyPk1bhWg==" }, "node_modules/api.fluff4.me": { - "version": "1.0.73", - "resolved": "https://registry.npmjs.org/api.fluff4.me/-/api.fluff4.me-1.0.73.tgz", - "integrity": "sha512-Kgr5NRdRO0ES6DQfmpTpAO7X9Pv+8aCh84MY7+KiKlqDeiCzta4Cuge+o+8tzJGQoQS4AJ8Cp1CSE4QxUG0ZRQ==" + "version": "1.0.75", + "resolved": "https://registry.npmjs.org/api.fluff4.me/-/api.fluff4.me-1.0.75.tgz", + "integrity": "sha512-/Zlw9k3psUdpzkc0Jxc0C1W3K5KEtQTPKaoyAwWBbXJYMmfFE58+AXJuWDgz97jDSkBt9sWh0yLzxi04umIW3A==" }, "node_modules/argparse": { "version": "2.0.1", @@ -279,9 +279,9 @@ "integrity": "sha512-RGdgjQUZba5p6QEFAVx2OGb8rQDL/cPRG7GiedRzMcJ1tYnUANBncjbSB1NRGwbvjcPeikRABz2nshyPk1bhWg==" }, "api.fluff4.me": { - "version": "1.0.73", - "resolved": "https://registry.npmjs.org/api.fluff4.me/-/api.fluff4.me-1.0.73.tgz", - "integrity": "sha512-Kgr5NRdRO0ES6DQfmpTpAO7X9Pv+8aCh84MY7+KiKlqDeiCzta4Cuge+o+8tzJGQoQS4AJ8Cp1CSE4QxUG0ZRQ==" + "version": "1.0.75", + "resolved": "https://registry.npmjs.org/api.fluff4.me/-/api.fluff4.me-1.0.75.tgz", + "integrity": "sha512-/Zlw9k3psUdpzkc0Jxc0C1W3K5KEtQTPKaoyAwWBbXJYMmfFE58+AXJuWDgz97jDSkBt9sWh0yLzxi04umIW3A==" }, "argparse": { "version": "2.0.1", diff --git a/src/package.json b/src/package.json index 8a5570b..152e964 100644 --- a/src/package.json +++ b/src/package.json @@ -1,7 +1,7 @@ { "private": true, "dependencies": { - "api.fluff4.me": "^1.0.73", + "api.fluff4.me": "^1.0.75", "prosemirror-example-setup": "1.2.3", "prosemirror-markdown": "1.13.1", "prosemirror-state": "1.4.3", diff --git a/src/ui/component/core/TextEditor.ts b/src/ui/component/core/TextEditor.ts index 477d351..16d9866 100644 --- a/src/ui/component/core/TextEditor.ts +++ b/src/ui/component/core/TextEditor.ts @@ -38,7 +38,8 @@ import Objects from "utility/Objects" import type { UnsubscribeState } from "utility/State" import State from "utility/State" import Store from "utility/Store" -import type Strings from "utility/Strings" +import MarkdownItHTML from "utility/string/MarkdownItHTML" +import type Strings from "utility/string/Strings" import Time from "utility/Time" import type { PartialRecord } from "utility/Type" import w3cKeyname from "w3c-keyname" @@ -421,7 +422,9 @@ const REGEX_ATTRIBUTE = (() => { const REGEX_CSS_PROPERTY = /^[-a-zA-Z_][a-zA-Z0-9_-]*$/ -const markdown = MarkdownIt("commonmark", { html: true }) +const markdown = new MarkdownIt("commonmark", { html: true, breaks: true }) +MarkdownItHTML.use(markdown, MarkdownItHTML.Options() + .disallowTags("img", "figure", "figcaption", "map", "area")) markdown.inline.ruler.enable("strikethrough") markdown.inline.ruler2.enable("strikethrough") @@ -548,10 +551,10 @@ markdown.inline.ruler2.before("emphasis", "underline", function underline_postPr //////////////////////////////////// interface MarkdownHTMLTokenRemapSpec { - getAttrs: (token: FluffToken) => Attrs | true | undefined + getAttrs: (token: MarkdownItHTML.Token) => Attrs | true | undefined } -const markdownHTMLRegistry: PartialRecord = { +const markdownHTMLNodeRegistry: PartialRecord = { text_align: { getAttrs: token => { const align = token.style?.get("text-align") @@ -563,13 +566,10 @@ const markdownHTMLRegistry: PartialRecord = { }, } -const decodeHTMLEntities = (text: string) => - new DOMParser().parseFromString(text, "text/html").body.textContent ?? "" +// const markdownHTMLMarkRegistry: PartialRecord = { +// } -interface FluffToken extends Token { - depth: number - skipped?: true - style?: Map +interface FluffToken extends MarkdownItHTML.Token { nodeAttrs?: Attrs } @@ -578,58 +578,28 @@ markdown.parse = (src, env) => { const rawTokens = originalParse.call(markdown, src, env) as FluffToken[] const tokens: FluffToken[] = [] - // the `depth` of the parent `_open` token - let depth = 0 + // the `level` of the parent `_open` token + let level = 0 for (const token of rawTokens) { - if (token.type !== "html_block") { - token.depth = token.nesting === -1 ? depth : depth + 1 - depth += token.nesting + if (token.type !== "html_block_open" && token.type !== "html_block_close") { tokens.push(token) continue } - let tag = token.content.trim() - if (!tag.startsWith("<") || !tag.endsWith(">")) { - console.warn("Invalid HTML in markdown:", tag) - token.skipped = true - continue - } - - tag = tag.slice(1, -1) - const closing = tag.startsWith("/") - token.nesting = closing ? -1 : 1 - - const attrsStartIndex = tag.indexOf(" ") + 1 - const type = !attrsStartIndex ? tag : tag.slice(0, attrsStartIndex - 1) - if (attrsStartIndex && !closing) { - const attrString = tag.slice(attrsStartIndex) - - token.attrs = [...attrString.matchAll(REGEX_ATTRIBUTE)] - .map(([, attribute, value]) => { - value = value.startsWith("'") || value.startsWith('"') ? value.slice(1, -1) : value - return [attribute.toLowerCase(), decodeHTMLEntities(value)] as const - }) - - token.style = parseStyleAttributeValue(token.attrGet("style")) - } - - token.content = type - if (closing) { - const opening = tokens.findLast(token => token.depth === depth) + if (token.nesting < 0) { + const opening = tokens.findLast(token => token.level === level) if (!opening) { - console.warn("Invalid HTML in markdown:", tag) - token.skipped = true + console.warn("Invalid HTML in markdown:", token.raw) continue } token.type = `${opening.type.slice(0, -5)}_close` - token.depth = depth tokens.push(token) - depth += token.nesting + level = token.level continue } - for (const [nodeType, spec] of Object.entries(markdownHTMLRegistry)) { + for (const [nodeType, spec] of Object.entries(markdownHTMLNodeRegistry)) { const attrs = spec.getAttrs(token) if (attrs) { token.type = nodeType @@ -640,8 +610,7 @@ markdown.parse = (src, env) => { } token.type = `${token.type}_open` - depth += token.nesting - token.depth = depth + level = token.level tokens.push(token) } @@ -659,7 +628,7 @@ const markdownParser = new MarkdownParser(schema, markdown, Objects.filterNullis mark: "strikethrough", }, - ...Object.entries(markdownHTMLRegistry) + ...Object.entries(markdownHTMLNodeRegistry) .toObject(([tokenType, spec]) => [tokenType, ({ block: tokenType, getAttrs: (token) => (token as FluffToken).nodeAttrs ?? {}, @@ -670,7 +639,7 @@ const markdownSerializer = new MarkdownSerializer( { ...defaultMarkdownSerializer.nodes, text_align: (state, node, parent, index) => { - state.write(`
\n\n`) + state.write(`
\n`) state.renderContent(node) state.write("
") state.closeBlock(node) diff --git a/src/ui/view/HomeView.ts b/src/ui/view/HomeView.ts index 2e7c484..fc86898 100644 --- a/src/ui/view/HomeView.ts +++ b/src/ui/view/HomeView.ts @@ -1,9 +1,12 @@ +import MarkdownIt from "markdown-it" +import Component from "ui/Component" import Block from "ui/component/core/Block" import Form from "ui/component/core/Form" import LabelledTable from "ui/component/core/LabelledTable" import TextEditor from "ui/component/core/TextEditor" import View from "ui/view/View" import ViewDefinition from "ui/view/ViewDefinition" +import MarkdownItHTML from "utility/string/MarkdownItHTML" export default ViewDefinition({ create: () => { @@ -12,6 +15,35 @@ export default ViewDefinition({ const block = Block().appendTo(view) const form = block.and(Form, block.title) + const output = Component("div") + Component("div") + .attributes.set("contenteditable", "plaintext-only") + .style.setProperty("white-space", "pre-wrap") + .style.setProperty("font", "inherit") + .style.setProperty("background", "#222") + .style.setProperty("width", "100%") + .style.setProperty("height", "400px") + .style.setProperty("padding", "0.5em") + .style.setProperty("box-sizing", "border-box") + .event.subscribe("input", event => { + const text = event.component.element.textContent ?? "" + const md = new MarkdownIt("commonmark", { html: true, breaks: true }) + MarkdownItHTML.use(md, MarkdownItHTML.Options() + .disallowTags("img", "figure", "figcaption", "map", "area")) + console.log(md.parse(text, {})) + output.element.innerHTML = md.render(text) + }) + .appendTo(form.content) + + output + .style.setProperty("font", "inherit") + .style.setProperty("background", "#222") + .style.setProperty("width", "100%") + .style.setProperty("padding", "0.5em") + .style.setProperty("margin-top", "1em") + .style.setProperty("box-sizing", "border-box") + .appendTo(form.content) + const table = LabelledTable().appendTo(form.content) table.label(label => label.text.set("test editor")) diff --git a/src/utility/Arrays.ts b/src/utility/Arrays.ts index f1b981d..4857c5f 100644 --- a/src/utility/Arrays.ts +++ b/src/utility/Arrays.ts @@ -66,6 +66,9 @@ declare global { findMap (predicate: (value: T, index: number, obj: T[]) => boolean, mapper: (value: T, index: number, obj: T[]) => RETURN): RETURN | undefined groupBy (grouper: (value: T, index: number, obj: T[]) => GROUP): [GROUP, T[]][] + + filterInPlace: Array["filter"] + mapInPlace: Array["filter"] } } @@ -298,6 +301,16 @@ namespace Arrays { return Object.entries(result) }) + + Define(Array.prototype, "filterInPlace", function (filter): any[] { + // eslint-disable-next-line @typescript-eslint/no-unsafe-argument + return this.splice(0, Infinity, ...this.filter(filter)) + }) + + Define(Array.prototype, "mapInPlace", function (mapper): any[] { + // eslint-disable-next-line @typescript-eslint/no-unsafe-argument + return this.splice(0, Infinity, ...this.map(mapper)) + }) } } diff --git a/src/utility/Time.ts b/src/utility/Time.ts index fcdd995..0a812d6 100644 --- a/src/utility/Time.ts +++ b/src/utility/Time.ts @@ -1,4 +1,4 @@ -import Strings from "utility/Strings" +import Strings from "utility/string/Strings" namespace Time { export type ISO = `${bigint}-${bigint}-${bigint}T${bigint}:${bigint}:${number}Z` diff --git a/src/utility/string/MarkdownItHTML.ts b/src/utility/string/MarkdownItHTML.ts new file mode 100644 index 0000000..858c358 --- /dev/null +++ b/src/utility/string/MarkdownItHTML.ts @@ -0,0 +1,954 @@ +import entities from "entities" +import type MarkdownIt from "markdown-it" +import type { PluginWithOptions, StateBlock, StateInline, Token as TokenBase } from "markdown-it" + +interface MarkdownItHTMLState { + block: StateBlock | undefined + inline: StateInline | undefined + i: number + l: number + e: number + src: string + silent: boolean + options: MarkdownItHTML.Options +} + +const html = Object.assign( + ((md, options) => { + const state: MarkdownItHTMLState = { + block: undefined as StateBlock | undefined, + inline: undefined as StateInline | undefined, + i: 0, + l: 0, + e: 0, + src: "", + silent: false, + options: { + ...html.defaultOptions, + ...options, + }, + } + + md.block.ruler.at("html_block", (block, startLine, endLine, silent) => { + state.block = block + state.src = state.block.src + state.l = startLine + state.i = state.block.bMarks[state.l] + state.block.tShift[state.l] + state.e = state.src.length + state.silent = silent + const result = html.consumeBlock(state) + state.block = undefined + return result + }, { alt: ["paragraph"] }) + + md.inline.ruler.at("html_inline", (inline, silent) => { + state.inline = inline + state.e = inline.posMax + state.i = inline.pos + state.src = inline.src + state.silent = silent + const result = html.consumeInline(state) + state.inline = undefined + return result + }) + + }) as PluginWithOptions, + { + + regexCSSProperty: /^[-a-zA-Z_][a-zA-Z0-9_-]*$/, + + defaultOptions: { + voidElements: [ + "area", + "base", + "br", + "col", + "embed", + "hr", + "img", + "input", + "link", + "meta", + "source", + "track", + "wbr", + ], + allowedTags: [ + // headings + "hgroup", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + // layout + "div", + "p", + "br", + "wbr", + "hr", + "details", + "summary", + "label", + // lists + "ol", + "ul", + "li", + // tables + "table", + "tr", + "th", + "td", + "caption", + "thead", + "tbody", + "tfoot", + // text + "span", + // text style + "i", + "b", + "u", + "s", + "strike", + "sup", + "sub", + "em", + "mark", + "small", + "strong", + // quoting/referencing + "q", + "cite", + "blockquote", + // links + "a", + // definitions + "abbr", + "dfn", + "dd", + "dt", + "dl", + // code + "code", + "samp", + "kbd", + // images + "img", + "figure", + "figcaption", + "area", + "map", + ], + allTagsAllowedAttributes: [ + "title", + "name", + "style", + "aria-label", + "aria-labelledby", + "aria-describedby", + "aria-hidden", + ], + allTagsAllowedAttributeValues: {}, + perTagAllowedAttributes: { + "a": ["href"], + "img": ["src", "alt", "usemap", "width", "height"], + "area": ["shape", "coords"], + "details": ["open"], + "ol": ["type", "start", "reversed"], + "li": ["value"], + "th": ["colspan", "rowspan", "headers", "scope", "abbr"], + "td": ["colspan", "rowspan", "headers"], + "q": ["cite"], + }, + perTagAllowedAttributeValues: { + "a": { "href": /^https?:/ }, + "img": { "src": /^https?:/ }, + "area": { "href": /^https?:/ }, + "q": { "cite": /^https?:/ }, + "blockquote": { "cite": /^https?:/ }, + }, + allTagsAllowedStyleProperties: [ + "color", + "text-align", + "font-family", + "font-style", + "font-weight", + "text-decoration", + "text-transform", + "line-height", + "letter-spacing", + "word-spacing", + "vertical-align", + "background-color", + "opacity", + "margin", + "padding", + "width", + "height", + "vertical-align", + "box-shadow", + "border-width", + "border-style", + "border-color", + "border-radius", + "text-indent", + "display", + "position", + ], + allTagsAllowedStylePropertyValues: { + "position": ["relative", "absolute", "sticky"], + }, + perTagAllowedStyleProperties: {}, + perTagAllowedStylePropertyValues: {}, + } satisfies MarkdownItHTML.Options as MarkdownItHTML.Options, + + Options (options?: MarkdownItHTML.Options): MarkdownItHTML.Options.Factory { + const factory: MarkdownItHTML.Options.Factory = Object.assign({ ...structuredClone(html.defaultOptions), ...options }, { + disallowTags (...tags) { + const disallowed = tags.map(tag => tag.toLowerCase()) + factory.allowedTags = factory.allowedTags.filter(tag => !disallowed.includes(tag)) + return factory + }, + allowTags (...tags) { + factory.allowedTags = [...new Set([...factory.allowedTags, ...tags.map(tag => tag.toLowerCase())])] + return factory + }, + disallowAttributes (...attributes) { + const disallowed = attributes.map(attr => attr.toLowerCase()) + factory.allTagsAllowedAttributes = factory.allTagsAllowedAttributes.filter(attr => !disallowed.includes(attr)) + for (const [tag, allowedAttributes] of Object.entries(factory.perTagAllowedAttributes)) + factory.perTagAllowedAttributes[tag] = allowedAttributes.filter(attr => !disallowed.includes(attr)) + return factory + }, + allowAttributes (...attributes) { + factory.allTagsAllowedAttributes = [...new Set([...factory.allTagsAllowedAttributes, ...attributes.map(attr => attr.toLowerCase())])] + return factory + }, + } satisfies Omit) + return factory + }, + + use (md: MarkdownIt, options?: MarkdownItHTML.Options.Factory) { + return md.use(html, options) + }, + + consumeBlock (state: MarkdownItHTMLState) { + if (!state.block) + return false + + html.consumeInlineWhitespace(state) + + if (state.silent) + return html.consumeTerminator(state) + + const result = html.consumeTagsLine(state) + if (!result) + return false + + state.l++ + state.block.line = state.l + + const indent = html.consumeInlineWhitespace(state) || 0 + if (indent >= state.block.blkIndent + 4) + state.block.blkIndent = indent - 4 // allow for indented code blocks within html block + else + state.block.blkIndent = indent + + return true + }, + + consumeInline (state: MarkdownItHTMLState) { + if (!state.inline || state.src[state.i] !== "<") + return false + + const tag = html.consumeTag(state) + if (!tag) + return false + + state.inline.pos = state.i + return true + }, + + consumeTerminator (state: MarkdownItHTMLState) { + const noSetBlockIndent = new Error().stack?.split("\n")?.at(4)?.includes("Array.lheading") + + const indent = html.consumeInlineWhitespace(state) || 0 + if (!html.consumeTagsLine(state)) + return false + + if (!noSetBlockIndent && state.block) + state.block.blkIndent = indent + + return true + }, + + consumeTagsLine (state: MarkdownItHTMLState): MarkdownItHTML.ConsumeResult | undefined { + let consumed = false + const tokens: TokenBase[] = [] + let token: TokenBase | true | undefined + while (token = html.consumeTag(state)) { + if (typeof token === "object") + tokens.push(token) + + consumed = true + html.consumeInlineWhitespace(state) + } + + if (!consumed) + return undefined + + if (state.i < state.src.length && !html.consumeNewline(state)) { + // a line of tags MUST end in a newline — if this doesn't, remove all the tokens we added and don't match + if (tokens.length) + state.block?.tokens.splice(0, Infinity, ...state.block.tokens + .filter(token => !tokens.includes(token))) + + return undefined + } + + return { + tokens, + } + }, + + consumeNewline (state: MarkdownItHTMLState) { + if (state.inline) + return false + + if (state.src[state.i] === "\n") { + state.i++ + return true + } + + if (state.src[state.i] !== "\r") + return false + + state.i++ + if (state.src[state.i] === "\n") + state.i++ + + return true + }, + + consumeWhitespace (state: MarkdownItHTMLState) { + if (state.inline) + return !!html.consumeInlineWhitespace(state) + + const start = state.i + if (state.i >= state.e) + return false + + for (state.i; state.i < state.e; state.i++) { + if (!html.isWhitespace(state)) + break + + if (html.consumeNewline(state)) { + state.l++ + state.i-- + } + } + + return state.i > start + }, + + consumeInlineWhitespace (state: MarkdownItHTMLState) { + if (state.i >= state.e) + return undefined + + let indent = 0 + for (state.i; state.i < state.e; state.i++) { + if (state.src[state.i] === " ") + indent++ + else if (state.src[state.i] === "\t") + indent += 4 + else + break + } + + return indent || undefined + }, + + consumeTag (state: MarkdownItHTMLState): TokenBase | true | undefined { + if (state.src[state.i] !== "<") + return undefined + + state.i++ + return html.consumeOpenTag(state) ?? html.consumeCloseTag(state) + }, + + consumeOpenTag (state: MarkdownItHTMLState): MarkdownItHTML.Token | true | undefined { + const start = state.i + + const tagNameRaw = html.consumeTagName(state) + if (!tagNameRaw) + return undefined + + const tagName = tagNameRaw.toLowerCase() + const o = state.options + if (!o.allowedTags.includes(tagNameRaw)) { + state.i = start + return undefined + } + + const attributes: MarkdownItHTML.AttributeTuple[] = [] + let style: Map | undefined + while (html.consumeWhitespace(state)) { + const attribute = html.consumeAttribute(state) + if (!attribute) + break + + let [name, value] = attribute + name = name.toLowerCase() + if (!o.allTagsAllowedAttributes.includes(name) && !o.perTagAllowedAttributes[tagName]?.includes(name)) + continue + + value = entities.decodeHTML5Strict(value) + if (name !== "style") { + const allowedValues = o.perTagAllowedAttributeValues[tagName]?.[name] ?? o.allTagsAllowedAttributeValues[name] + if (allowedValues !== undefined && !html.matchesAllowedValues(value, allowedValues)) + continue + + attributes.push(attribute) + continue + } + + style = html.parseStyleAttributeValue(value) + let styleValue = "" + for (let [property, value] of style) { + property = property.toLowerCase() + if (!o.allTagsAllowedStyleProperties.includes(property) && !o.perTagAllowedStyleProperties[tagName]?.includes(property)) + continue + + const importantToken = "!important" + const important = value.slice(-importantToken.length).toLowerCase() === importantToken + if (important) + value = value.slice(0, -importantToken.length).trim() + + const allowedValues = o.perTagAllowedStylePropertyValues[tagName]?.[property] ?? o.allTagsAllowedStylePropertyValues[property] + if (allowedValues !== undefined && !html.matchesAllowedValues(value, allowedValues)) + continue + + styleValue += `${property}:${value}${important ? importantToken : ""};` + } + + if (styleValue.length) + attributes.push(["style", styleValue.slice(0, -1)]) + } + + if (state.src[state.i] === "/") + state.i++ + + if (state.src[state.i] !== ">") { + state.i = start + return undefined + } + + state.i++ + const nesting = state.options.voidElements.includes(tagName) ? 0 : 1 + if (state.silent) + return true + + let type = `html_${state.block ? "block" : "inline"}${nesting ? "_open" : ""}` + if (tagName === "br") + type = "softbreak" + + const mdState = state.block ?? state.inline! + const token = mdState.push(type, tagName, nesting) + Object.assign(token, { + style, + raw: state.src.slice(start - 1, state.i), + }) + + for (const attribute of attributes) + token.attrPush(attribute) + + return token + }, + + consumeCloseTag (state: MarkdownItHTMLState): TokenBase | true | undefined { + const start = state.i + if (state.src[state.i] !== "/") + return undefined + + state.i++ + const tagNameRaw = html.consumeTagName(state) + if (!tagNameRaw) + return undefined + + if (state.src[state.i] !== ">") { + state.i = start + return undefined + } + + state.i++ + const tagName = tagNameRaw.toLowerCase() + if (!state.options.allowedTags.includes(tagName)) { + state.i = start + return undefined + } + + if (state.silent || state.options.voidElements.includes(tagName)) + return true + + const type = `html_${state.block ? "block" : "inline"}_close` + const mdState = state.block ?? state.inline! + + const token = mdState.push(type, tagName, -1) + Object.assign(token, { raw: state.src.slice(start - 1, state.i) }) + + if (state.inline && !state.inline.delimiters) + state.inline.delimiters = [] + + return token + }, + + consumeTagName (state: MarkdownItHTMLState) { + const start = state.i + if (state.i >= state.e) + return undefined + + if (!html.isAlpha(state)) + return undefined + + for (state.i++; state.i < state.e; state.i++) + if (!html.isAlphaNumeric(state)) + break + + return state.src.slice(start, state.i) + }, + + consumeAttribute (state: MarkdownItHTMLState): MarkdownItHTML.AttributeTuple | undefined { + const start = state.i + const name = html.consumeAttributeName(state) + if (!name) + return undefined + + const valueStart = state.i + html.consumeWhitespace(state) + if (state.src[state.i] !== "=") { + state.i = valueStart + return [name, ""] + } + + state.i++ + html.consumeWhitespace(state) + const value = html.consumeAttributeValue(state) + if (!value) { + state.i = start + return undefined + } + + return [name, value] + }, + + consumeAttributeName (state: MarkdownItHTMLState) { + const start = state.i + if (state.i >= state.e) + return undefined + + for (state.i; state.i < state.e; state.i++) { + const charCode = state.src.charCodeAt(state.i) + + const isInvalidChar = false + || charCode === 0x0020 // SPACE + || charCode === 0x0022 // " + || charCode === 0x0027 // ' + || charCode === 0x003E // > + || charCode === 0x002F // / + || charCode === 0x003D // = + || html.isNonCharacter(state, charCode) + || html.isControl(state, charCode) + if (isInvalidChar) + break + } + + return state.i > start ? state.src.slice(start, state.i) : undefined + }, + + consumeAttributeValue (state: MarkdownItHTMLState) { + return false + || html.consumeUnquotedAttributeValue(state) + || html.consumeQuotedAttributeValue(state) + || undefined + }, + + consumeUnquotedAttributeValue (state: MarkdownItHTMLState) { + let result = "" + + while (state.i < state.e) { + const charCode = state.src.charCodeAt(state.i) + + // Check for invalid characters in unquoted attribute values + const isInvalidChar = false + || charCode === 0x0022 // " + || charCode === 0x0027 // ' + || charCode === 0x003D // = + || charCode === 0x003C // < + || charCode === 0x003E // > + || charCode === 0x0060 // ` + || html.isWhitespace(state, charCode) // ASCII whitespace + if (isInvalidChar) + break + + if (charCode !== 0x0026) { // not & + result += state.src[state.i] + state.i++ + continue + } + + const charRef = html.consumeCharacterReference(state) + if (!charRef) { + result += "&" + state.i++ + continue + } + + result += charRef + // `i` is already at the next pos + } + + return result || undefined + }, + + consumeQuotedAttributeValue (state: MarkdownItHTMLState) { + const start = state.i + + const quoteChar = state.src[state.i] + if (quoteChar !== "'" && quoteChar !== '"') + return undefined + + state.i++ + let result = "" + + while (state.i < state.e) { + const charCode = state.src.charCodeAt(state.i) + + if (state.src[state.i] === quoteChar) { + state.i++ + return result + } + + if (charCode !== 0x0026) { // not & + const charStart = state.i + if (html.consumeNewline(state)) { + state.l++ + result += state.src.slice(charStart, state.i) + continue + } + + const isNewlineInInlineMode = state.inline && html.isWhitespace(state) && state.src[state.i] !== " " && state.src[state.i] !== "\t" + if (isNewlineInInlineMode) { + state.i = start + return undefined + } + + result += state.src[state.i] + state.i++ + continue + } + + const charRef = html.consumeCharacterReference(state) + if (!charRef) { + result += "&" + state.i++ + continue + } + + result += charRef + // `i` is already at the next pos + } + + // no closing quote before the end of `src` + state.i = start + return undefined + }, + + consumeCharacterReference (state: MarkdownItHTMLState) { + const start = state.i + if (state.src[state.i] !== "&") + return undefined + + state.i++ + + const isValid = html.consumeNumericCharacterReference(state) || html.consumeNamedCharacterReference(state) + if (!isValid) { + state.i = start + return undefined + } + + return state.src.slice(start, state.i) + }, + + consumeNamedCharacterReference (state: MarkdownItHTMLState) { + const nameStart = state.i + for (state.i; state.i < state.e; state.i++) + if (!html.isAlpha(state)) + break + + if (state.i === nameStart || state.src[state.i] !== ";") + return false + + state.i++ + return true + }, + + consumeNumericCharacterReference (state: MarkdownItHTMLState) { + if (state.src[state.i] !== "#") + return false + + state.i++ + + const isHex = state.src[state.i] === "x" || state.src[state.i] === "X" + if (isHex) + state.i++ + + const digitsStart = state.i + for (state.i; state.i < state.e; state.i++) + if (isHex ? !html.isHexadecimal(state) : !html.isNumeric(state)) + break + + if (state.i === digitsStart || state.src[state.i] !== ";") + return false + + const codePoint = parseInt(state.src.slice(digitsStart, state.i), isHex ? 16 : 10) + if (codePoint === 0x000D || html.isNonCharacter(state, codePoint) || (html.isControl(state, codePoint) && !html.isWhitespace(state, codePoint))) + return false + + state.i++ + return true + }, + + parseStyleAttributeValue: ((style?: string | null) => { + if (style === undefined || style === null) + return undefined + + const styles = new Map() + let key = "" + let value = "" + let inValue = false + let isEscaped = false + let isQuoted = false + let isComment = false + let quoteChar = "" + let parenCount = 0 + + for (let i = 0; i < style.length; i++) { + const char = style[i] + if (isComment) { + if (char !== "*" && style[i + 1] !== "/") + continue + + isComment = false + i++ + continue + } + + if (char === "\\") { + isEscaped = true + continue + } + + if (isEscaped) { + value += char + isEscaped = false + continue + } + + if (!isComment && char === "/" && style[i + 1] === "*") { + isComment = true + i++ + continue + } + + if (isQuoted) { + if (char === quoteChar) { + isQuoted = false + value += char + continue + } + + } else { + if (char === '"' || char === "'") { + isQuoted = true + quoteChar = char + value += char + continue + } + } + + if (char === "(" && !isQuoted) { + parenCount++ + value += char + continue + } + + if (char === ")" && !isQuoted) { + parenCount-- + value += char + continue + } + + if (char === ":" && !isQuoted && parenCount === 0) { + inValue = true + continue + } + + if (char === ";" && !isQuoted && parenCount === 0) { + if (key && value) { + key = key.trim() + if (!html.regexCSSProperty.test(key)) + console.warn(`Invalid CSS property "${key}"`) + else + styles.set(key, value.trim()) + key = "" + value = "" + } + inValue = false + continue + } + + if (inValue) { + value += char + } else { + key += char + } + } + + if (key && value) { + key = key.trim() + if (!html.regexCSSProperty.test(key)) + console.warn(`Invalid CSS property "${key}"`) + else + styles.set(key, value.trim()) + } + + return styles + }) as { + (style: string): Map + (style?: string | null): Map | undefined + }, + + isAlpha (state: MarkdownItHTMLState, charCode = state.src.charCodeAt(state.i)) { + return (charCode >= 65 && charCode <= 90) || (charCode >= 97 && charCode <= 122) // A-Z, a-z + }, + + isNumeric (state: MarkdownItHTMLState, charCode = state.src.charCodeAt(state.i)) { + return charCode >= 48 && charCode <= 57 + }, + + isHexadecimal (state: MarkdownItHTMLState, charCode = state.src.charCodeAt(state.i)) { + return (charCode >= 65 && charCode <= 70) || (charCode >= 97 && charCode <= 102) || html.isNumeric(state, charCode) + }, + + isAlphaNumeric (state: MarkdownItHTMLState, charCode = state.src.charCodeAt(state.i)) { + return html.isAlpha(state, charCode) || html.isNumeric(state, charCode) + }, + + isNonCharacter (state: MarkdownItHTMLState, charCode = state.src.charCodeAt(state.i)) { + return false + || (charCode >= 0xFDD0 && charCode <= 0xFDEF) + || charCode === 0xFFFE || charCode === 0xFFFF + || charCode === 0x1FFFE || charCode === 0x1FFFF + || charCode === 0x2FFFE || charCode === 0x2FFFF + || charCode === 0x3FFFE || charCode === 0x3FFFF + || charCode === 0x4FFFE || charCode === 0x4FFFF + || charCode === 0x5FFFE || charCode === 0x5FFFF + || charCode === 0x6FFFE || charCode === 0x6FFFF + || charCode === 0x7FFFE || charCode === 0x7FFFF + || charCode === 0x8FFFE || charCode === 0x8FFFF + || charCode === 0x9FFFE || charCode === 0x9FFFF + || charCode === 0xAFFFE || charCode === 0xAFFFF + || charCode === 0xBFFFE || charCode === 0xBFFFF + || charCode === 0xCFFFE || charCode === 0xCFFFF + || charCode === 0xDFFFE || charCode === 0xDFFFF + || charCode === 0xEFFFE || charCode === 0xEFFFF + || charCode === 0xFFFFE || charCode === 0xFFFFF + || charCode === 0x10FFFE || charCode === 0x10FFFF + }, + + isControl (state: MarkdownItHTMLState, charCode = state.src.charCodeAt(state.i)) { + return false + || (charCode >= 0x0000 && charCode <= 0x001F) + || (charCode >= 0x007F && charCode <= 0x009F) + }, + + isWhitespace (state: MarkdownItHTMLState, charCode = state.src.charCodeAt(state.i)) { + return false + || charCode === 0x0009 // TAB + || charCode === 0x000A // LF + || charCode === 0x000C // FF + || charCode === 0x000D // CR + || charCode === 0x0020 // SPACE + }, + + matchesAllowedValues (value: string, allowed: MarkdownItHTML.AllowedValues): boolean { + if (Array.isArray(allowed)) + return allowed.some(allowed => html.matchesAllowedValues(value, allowed)) + + if (typeof allowed === "string") + return value === allowed + + if (typeof allowed === "function") + return allowed(value) + + return allowed.test(value) + }, + }, +) + +const MarkdownItHTML = html +type MarkdownItHTML = typeof MarkdownItHTML + +namespace MarkdownItHTML { + + type AllowedValue = string | RegExp | ((value: string) => boolean) + export type AllowedValues = AllowedValue | AllowedValue[] + + export interface Options { + /** The tagnames that will be parsed as HTML */ + allowedTags: string[] + /** Attributes that are allowed in all tags */ + allTagsAllowedAttributes: string[] + /** Allowed attribute values for all tags */ + allTagsAllowedAttributeValues: Record + /** Additional attributes allowed in specific tags */ + perTagAllowedAttributes: Record + /** Override allowed attribute values for specific tags */ + perTagAllowedAttributeValues: Record> + /** Allowed style properties on all tags */ + allTagsAllowedStyleProperties: string[] + /** Allowed values for style properties on all tags */ + allTagsAllowedStylePropertyValues: Record + /** Additional allowed style properties for specific tags */ + perTagAllowedStyleProperties: Record + /** Override allowed style property values for specific tags */ + perTagAllowedStylePropertyValues: Record> + /** The current list of void elements in the HTML standard, or extras if you're doing something custom */ + voidElements: string[] + } + + export namespace Options { + export interface Factory extends Options { + allowTags (...tags: string[]): this + disallowTags (...tags: string[]): this + allowAttributes (...tags: string[]): this + disallowAttributes (...tags: string[]): this + } + } + + export type AttributeTuple = [name: string, value: string] + + + export interface Token extends TokenBase { + readonly raw?: string + readonly style?: ReadonlyMap + } + + export interface ConsumeResult { + tokens: Token[] + } +} + +export default MarkdownItHTML diff --git a/src/utility/Strings.ts b/src/utility/string/Strings.ts similarity index 100% rename from src/utility/Strings.ts rename to src/utility/string/Strings.ts