From 6d827c85c876e3bf9c55887bac4cbe9aeb9e2bdf Mon Sep 17 00:00:00 2001 From: jinbowen Date: Tue, 26 Nov 2024 14:08:55 +0800 Subject: [PATCH] fix: update punctuation regex syntax to fix babel mistaken transpile https://github.com/markedjs/marked/pull/3540#issuecomment-2498465917 --- src/rules.ts | 74 ++++++++++++++++++++++++++++------------------------ 1 file changed, 40 insertions(+), 34 deletions(-) diff --git a/src/rules.ts b/src/rules.ts index 2181cf510b9..be69f2e7631 100644 --- a/src/rules.ts +++ b/src/rules.ts @@ -114,15 +114,15 @@ const _tag = 'address|article|aside|base|basefont|blockquote|body|caption' const _comment = /|$))/; const html = edit( '^ {0,3}(?:' // optional indentation -+ '<(script|pre|style|textarea)[\\s>][\\s\\S]*?(?:[^\\n]*\\n+|$)' // (1) -+ '|comment[^\\n]*(\\n+|$)' // (2) -+ '|<\\?[\\s\\S]*?(?:\\?>\\n*|$)' // (3) -+ '|\\n*|$)' // (4) -+ '|\\n*|$)' // (5) -+ '|)[\\s\\S]*?(?:(?:\\n[ \t]*)+\\n|$)' // (6) -+ '|<(?!script|pre|style|textarea)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n[ \t]*)+\\n|$)' // (7) open tag -+ '|(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n[ \t]*)+\\n|$)' // (7) closing tag -+ ')', 'i') + + '<(script|pre|style|textarea)[\\s>][\\s\\S]*?(?:[^\\n]*\\n+|$)' // (1) + + '|comment[^\\n]*(\\n+|$)' // (2) + + '|<\\?[\\s\\S]*?(?:\\?>\\n*|$)' // (3) + + '|\\n*|$)' // (4) + + '|\\n*|$)' // (5) + + '|)[\\s\\S]*?(?:(?:\\n[ \t]*)+\\n|$)' // (6) + + '|<(?!script|pre|style|textarea)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n[ \t]*)+\\n|$)' // (7) open tag + + '|(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n[ \t]*)+\\n|$)' // (7) closing tag + + ')', 'i') .replace('comment', _comment) .replace('tag', _tag) .replace('attribute', / +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"| *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?/) @@ -172,8 +172,8 @@ type BlockKeys = keyof typeof blockNormal; const gfmTable = edit( '^ *([^\\n ].*)\\n' // Header -+ ' {0,3}((?:\\| *)?:?-+:? *(?:\\| *:?-+:? *)*(?:\\| *)?)' // Align -+ '(?:\\n((?:(?! *\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)') // Cells + + ' {0,3}((?:\\| *)?:?-+:? *(?:\\| *:?-+:? *)*(?:\\| *)?)' // Align + + '(?:\\n((?:(?! *\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)') // Cells .replace('hr', hr) .replace('heading', ' {0,3}#{1,6}(?:\\s|$)') .replace('blockquote', ' {0,3}>') @@ -243,42 +243,48 @@ const br = /^( {2,}|\\)\n(?!\s*$)/; const inlineText = /^(`+|[^`])(?:(?= {2,}\n)|[\s\S]*?(?:(?=[\\ const blockSkip = /\[[^[\]]*?\]\((?:\\.|[^\\\(\)]|\((?:\\.|[^\\\(\)])*\))*\)|`[^`]*?`|<[^<>]*?>/g; -const emStrongLDelim = edit(/^(?:\*+(?:((?!\*)[punct])|[^\s*]))|^_+(?:((?!_)[punct])|([^\s_]))/, 'u') +const emStrongLDelim = edit(/^(?:\*+(?:((?!\*)punct)|[^\s*]))|^_+(?:((?!_)punct)|([^\s_]))/, 'u') .replace(/punct/g, _punctuation) .getRegex(); const emStrongRDelimAst = edit( '^[^_*]*?__[^_*]*?\\*[^_*]*?(?=__)' // Skip orphan inside strong -+ '|[^*]+(?=[^*])' // Consume to delim -+ '|(?!\\*)[punct](\\*+)(?=[\\s]|$)' // (1) #*** can only be a Right Delimiter -+ '|[^punct\\s](\\*+)(?!\\*)(?=[punct\\s]|$)' // (2) a***#, a*** can only be a Right Delimiter -+ '|(?!\\*)[punct\\s](\\*+)(?=[^punct\\s])' // (3) #***a, ***a can only be Left Delimiter -+ '|[\\s](\\*+)(?!\\*)(?=[punct])' // (4) ***# can only be Left Delimiter -+ '|(?!\\*)[punct](\\*+)(?!\\*)(?=[punct])' // (5) #***# can be either Left or Right Delimiter -+ '|[^punct\\s](\\*+)(?=[^punct\\s])', 'gu') // (6) a***a can be either Left or Right Delimiter + + '|[^*]+(?=[^*])' // Consume to delim + + '|(?!\\*)punct(\\*+)(?=[\\s]|$)' // (1) #*** can only be a Right Delimiter + + '|notPunctSpace(\\*+)(?!\\*)(?=punctSpace|$)' // (2) a***#, a*** can only be a Right Delimiter + + '|(?!\\*)punctSpace(\\*+)(?=notPunctSpace)' // (3) #***a, ***a can only be Left Delimiter + + '|[\\s](\\*+)(?!\\*)(?=punct)' // (4) ***# can only be Left Delimiter + + '|(?!\\*)punct(\\*+)(?!\\*)(?=punct)' // (5) #***# can be either Left or Right Delimiter + + '|notPunctSpace(\\*+)(?=notPunctSpace)', 'gu') // (6) a***a can be either Left or Right Delimiter + .replace(/notPunctSpace/g, _notPunctuationOrSpace) + .replace(/punctSpace/g, _punctuationOrSpace) .replace(/punct/g, _punctuation) .getRegex(); // (6) Not allowed for _ const emStrongRDelimUnd = edit( '^[^_*]*?\\*\\*[^_*]*?_[^_*]*?(?=\\*\\*)' // Skip orphan inside strong -+ '|[^_]+(?=[^_])' // Consume to delim -+ '|(?!_)[punct](_+)(?=[\\s]|$)' // (1) #___ can only be a Right Delimiter -+ '|[^punct\\s](_+)(?!_)(?=[punct\\s]|$)' // (2) a___#, a___ can only be a Right Delimiter -+ '|(?!_)[punct\\s](_+)(?=[^punct\\s])' // (3) #___a, ___a can only be Left Delimiter -+ '|[\\s](_+)(?!_)(?=[punct])' // (4) ___# can only be Left Delimiter -+ '|(?!_)[punct](_+)(?!_)(?=[punct])', 'gu') // (5) #___# can be either Left or Right Delimiter + + '|[^_]+(?=[^_])' // Consume to delim + + '|(?!_)punct(_+)(?=[\\s]|$)' // (1) #___ can only be a Right Delimiter + + '|notPunctSpace(_+)(?!_)(?=punctSpace|$)' // (2) a___#, a___ can only be a Right Delimiter + + '|(?!_)punctSpace(_+)(?=notPunctSpace)' // (3) #___a, ___a can only be Left Delimiter + + '|[\\s](_+)(?!_)(?=punct)' // (4) ___# can only be Left Delimiter + + '|(?!_)punct(_+)(?!_)(?=punct)', 'gu') // (5) #___# can be either Left or Right Delimiter + .replace(/notPunctSpace/g, _notPunctuationOrSpace) + .replace(/punctSpace/g, _punctuationOrSpace) .replace(/punct/g, _punctuation) .getRegex(); -const anyPunctuation = edit(/\\([punct])/, 'gu') +const anyPunctuation = edit(/\\(punct)/, 'gu') .replace(/punct/g, _punctuation) .getRegex(); @@ -290,11 +296,11 @@ const autolink = edit(/^<(scheme:[^\s\x00-\x1f<>]*|email)>/) const _inlineComment = edit(_comment).replace('(?:-->|$)', '-->').getRegex(); const tag = edit( '^comment' - + '|^' // self-closing tag - + '|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>' // open tag - + '|^<\\?[\\s\\S]*?\\?>' // processing instruction, e.g. - + '|^' // declaration, e.g. - + '|^') // CDATA section + + '|^' // self-closing tag + + '|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>' // open tag + + '|^<\\?[\\s\\S]*?\\?>' // processing instruction, e.g. + + '|^' // declaration, e.g. + + '|^') // CDATA section .replace('comment', _inlineComment) .replace('attribute', /\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/) .getRegex();