From d4b0401a12fcc2c04272472bdfc70e56b473a2b2 Mon Sep 17 00:00:00 2001 From: Sam Rawlins Date: Tue, 16 Jul 2019 16:16:01 -0700 Subject: [PATCH] Fix some html entities (#256) Fix the HTML-ification of ", <, > --- CHANGELOG.md | 2 ++ lib/src/inline_parser.dart | 16 ++++++++++++++-- test/common_mark/backslash_escapes.unit | 4 ++-- test/common_mark/link_reference_definitions.unit | 2 +- test/common_mark/links.unit | 2 +- test/common_mark/setext_headings.unit | 2 +- test/gfm/backslash_escapes.unit | 4 ++-- test/gfm/link_reference_definitions.unit | 2 +- test/gfm/links.unit | 2 +- test/gfm/setext_headings.unit | 2 +- test/original/backslash_escapes.unit | 4 ++-- tool/common_mark_stats.json | 6 +++--- tool/common_mark_stats.txt | 6 +++--- tool/gfm_stats.json | 6 +++--- tool/gfm_stats.txt | 6 +++--- 15 files changed, 40 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 049c2465..7962de84 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ * Improve strict spec compliance for `blockquote` by always putting the closing tag on a new line. * Improve strict spec compliance for `code` elements defined with "\`". +* Properly encode `<`, `>`, and `"` as their respective HTML entities when + interpreted as text. ## 2.0.3 diff --git a/lib/src/inline_parser.dart b/lib/src/inline_parser.dart index a1fede50..10b56f2c 100644 --- a/lib/src/inline_parser.dart +++ b/lib/src/inline_parser.dart @@ -234,8 +234,20 @@ class EscapeSyntax extends InlineSyntax { EscapeSyntax() : super(r'''\\[!"#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~]'''); bool onMatch(InlineParser parser, Match match) { - // Insert the substitution. - parser.addNode(Text(match[0][1])); + final char = match[0].codeUnitAt(1); + // Insert the substitution. Why these three charactes are replaced with + // their equivalent HTML entity referenced appears to be missing from the + // CommonMark spec, but is very present in all of the examples. + // https://talk.commonmark.org/t/entity-ification-of-quotes-and-brackets-missing-from-spec/3207 + if (char == $double_quote) { + parser.addNode(Text('"')); + } else if (char == $lt) { + parser.addNode(Text('<')); + } else if (char == $gt) { + parser.addNode(Text('>')); + } else { + parser.addNode(Text(match[0][1])); + } return true; } } diff --git a/test/common_mark/backslash_escapes.unit b/test/common_mark/backslash_escapes.unit index f16ddfda..f906dc38 100644 --- a/test/common_mark/backslash_escapes.unit +++ b/test/common_mark/backslash_escapes.unit @@ -1,7 +1,7 @@ >>> Backslash escapes - 298 \!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~ <<< -

!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~

+

!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~

>>> Backslash escapes - 299 \ \A\a\ \3\φ\« <<< @@ -18,7 +18,7 @@ \ö not a character entity <<<

*not emphasized* -
not a tag +<br/> not a tag [not a link](/foo) `not code` 1. not a list diff --git a/test/common_mark/link_reference_definitions.unit b/test/common_mark/link_reference_definitions.unit index 388ed8d0..16f86fb0 100644 --- a/test/common_mark/link_reference_definitions.unit +++ b/test/common_mark/link_reference_definitions.unit @@ -84,7 +84,7 @@ with blank line' [foo] <<< -

[foo]: /url\bar*baz "foo"bar\baz"

+

[foo]: /url\bar*baz "foo"bar\baz"

[foo]

>>> Link reference definitions - 172 [foo] diff --git a/test/common_mark/links.unit b/test/common_mark/links.unit index 7f9707fd..fb9cf5f4 100644 --- a/test/common_mark/links.unit +++ b/test/common_mark/links.unit @@ -41,7 +41,7 @@ bar>)

>>> Links - 490 [link]() <<< -

[link](<foo>)

+

[link](<foo>)

>>> Links - 491 [a]( diff --git a/test/common_mark/setext_headings.unit b/test/common_mark/setext_headings.unit index 563018d9..e0e45722 100644 --- a/test/common_mark/setext_headings.unit +++ b/test/common_mark/setext_headings.unit @@ -183,7 +183,7 @@ Baz \> foo ------ <<< -

> foo

+

> foo

>>> Setext headings - 73 Foo diff --git a/test/gfm/backslash_escapes.unit b/test/gfm/backslash_escapes.unit index ca03f364..e0f5dfb2 100644 --- a/test/gfm/backslash_escapes.unit +++ b/test/gfm/backslash_escapes.unit @@ -1,7 +1,7 @@ >>> Backslash escapes - 308 \!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~ <<< -

!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~

+

!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~

>>> Backslash escapes - 309 \ \A\a\ \3\φ\« <<< @@ -18,7 +18,7 @@ \ö not a character entity <<<

*not emphasized* -
not a tag +<br/> not a tag [not a link](/foo) `not code` 1. not a list diff --git a/test/gfm/link_reference_definitions.unit b/test/gfm/link_reference_definitions.unit index 388ed8d0..16f86fb0 100644 --- a/test/gfm/link_reference_definitions.unit +++ b/test/gfm/link_reference_definitions.unit @@ -84,7 +84,7 @@ with blank line' [foo] <<< -

[foo]: /url\bar*baz "foo"bar\baz"

+

[foo]: /url\bar*baz "foo"bar\baz"

[foo]

>>> Link reference definitions - 172 [foo] diff --git a/test/gfm/links.unit b/test/gfm/links.unit index a64166a3..698282d5 100644 --- a/test/gfm/links.unit +++ b/test/gfm/links.unit @@ -41,7 +41,7 @@ bar>)

>>> Links - 502 [link]() <<< -

[link](<foo>)

+

[link](<foo>)

>>> Links - 503 [a]( diff --git a/test/gfm/setext_headings.unit b/test/gfm/setext_headings.unit index 563018d9..e0e45722 100644 --- a/test/gfm/setext_headings.unit +++ b/test/gfm/setext_headings.unit @@ -183,7 +183,7 @@ Baz \> foo ------ <<< -

> foo

+

> foo

>>> Setext headings - 73 Foo diff --git a/test/original/backslash_escapes.unit b/test/original/backslash_escapes.unit index 8828c497..05a8c999 100644 --- a/test/original/backslash_escapes.unit +++ b/test/original/backslash_escapes.unit @@ -5,8 +5,8 @@ and \? and \@ and \[ and \\ and \] and \^ and \_ and \` and \{ and \| and \} and \~. <<< -

Punctuations like ! and " and # and $ and % and & and ' and ( and ) -and * and + and , and - and . and / and : and ; and < and = and > +

Punctuations like ! and " and # and $ and % and & and ' and ( and ) +and * and + and , and - and . and / and : and ; and < and = and > and ? and @ and [ and \ and ] and ^ and _ and ` and { and | and } and ~.

>>> Inline code blocks can be escaped. diff --git a/tool/common_mark_stats.json b/tool/common_mark_stats.json index 5318f46f..a250f903 100644 --- a/tool/common_mark_stats.json +++ b/tool/common_mark_stats.json @@ -43,7 +43,7 @@ "Backslash escapes": { "298": "loose", "299": "strict", - "300": "fail", + "300": "loose", "301": "strict", "302": "strict", "303": "strict", @@ -435,7 +435,7 @@ "487": "strict", "488": "strict", "489": "strict", - "490": "loose", + "490": "strict", "491": "strict", "492": "strict", "493": "strict", @@ -651,7 +651,7 @@ "69": "strict", "70": "strict", "71": "strict", - "72": "loose", + "72": "strict", "73": "strict", "74": "strict", "75": "strict", diff --git a/tool/common_mark_stats.txt b/tool/common_mark_stats.txt index 86ccf5d0..249ed100 100644 --- a/tool/common_mark_stats.txt +++ b/tool/common_mark_stats.txt @@ -1,6 +1,6 @@ 17 of 18 – 94.4% ATX headings 19 of 19 – 100.0% Autolinks - 11 of 13 – 84.6% Backslash escapes + 12 of 13 – 92.3% Backslash escapes 1 of 1 – 100.0% Blank lines 22 of 25 – 88.0% Block quotes 21 of 22 – 95.5% Code spans @@ -24,5 +24,5 @@ 11 of 11 – 100.0% Tabs 3 of 3 – 100.0% Textual content 19 of 19 – 100.0% Thematic breaks - 597 of 649 – 92.0% TOTAL - 523 of 597 – 87.6% TOTAL Strict + 598 of 649 – 92.1% TOTAL + 525 of 598 – 87.8% TOTAL Strict diff --git a/tool/gfm_stats.json b/tool/gfm_stats.json index a1ac55fb..243cd6cd 100644 --- a/tool/gfm_stats.json +++ b/tool/gfm_stats.json @@ -56,7 +56,7 @@ "Backslash escapes": { "308": "loose", "309": "strict", - "310": "fail", + "310": "loose", "311": "strict", "312": "strict", "313": "strict", @@ -451,7 +451,7 @@ "499": "strict", "500": "strict", "501": "strict", - "502": "loose", + "502": "strict", "503": "strict", "504": "strict", "505": "strict", @@ -667,7 +667,7 @@ "69": "strict", "70": "strict", "71": "strict", - "72": "loose", + "72": "strict", "73": "strict", "74": "strict", "75": "strict", diff --git a/tool/gfm_stats.txt b/tool/gfm_stats.txt index 3c7f6ba1..ab2fb170 100644 --- a/tool/gfm_stats.txt +++ b/tool/gfm_stats.txt @@ -1,7 +1,7 @@ 17 of 18 – 94.4% ATX headings 17 of 19 – 89.5% Autolinks 8 of 11 – 72.7% Autolinks (extension) - 11 of 13 – 84.6% Backslash escapes + 12 of 13 – 92.3% Backslash escapes 1 of 1 – 100.0% Blank lines 22 of 25 – 88.0% Block quotes 21 of 22 – 95.5% Code spans @@ -28,5 +28,5 @@ 11 of 11 – 100.0% Tabs 3 of 3 – 100.0% Textual content 19 of 19 – 100.0% Thematic breaks - 612 of 671 – 91.2% TOTAL - 529 of 612 – 86.4% TOTAL Strict + 613 of 671 – 91.4% TOTAL + 531 of 613 – 86.6% TOTAL Strict