From f07422b17f1e05737093db0a91649b5ee7417eeb Mon Sep 17 00:00:00 2001 From: Sam Rawlins Date: Wed, 28 Aug 2019 15:24:38 -0700 Subject: [PATCH 1/2] Fix parsing of inline code blocks with multiple backticks --- CHANGELOG.md | 1 + lib/src/block_parser.dart | 26 ++++++++++++++++++++---- test/common_mark/code_spans.unit | 3 +-- test/common_mark/fenced_code_blocks.unit | 10 ++++----- test/gfm/code_spans.unit | 3 +-- test/gfm/fenced_code_blocks.unit | 10 ++++----- tool/common_mark_stats.json | 6 +++--- tool/common_mark_stats.txt | 8 ++++---- tool/gfm_stats.json | 6 +++--- tool/gfm_stats.txt | 8 ++++---- 10 files changed, 47 insertions(+), 34 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7962de84..5b3dc7b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ * Improve strict spec compliance for `code` elements defined with "\`". * Properly encode `<`, `>`, and `"` as their respective HTML entities when interpreted as text. +* Improve inline code parsing when using multiple backticks. ## 2.0.3 diff --git a/lib/src/block_parser.dart b/lib/src/block_parser.dart index 81530043..b69b7f64 100644 --- a/lib/src/block_parser.dart +++ b/lib/src/block_parser.dart @@ -2,6 +2,8 @@ // for details. All rights reserved. Use of this source code is governed by a // BSD-style license that can be found in the LICENSE file. +import 'package:charcode/charcode.dart'; + import 'ast.dart'; import 'document.dart'; import 'util.dart'; @@ -25,7 +27,7 @@ final _blockquotePattern = RegExp(r'^[ ]{0,3}>[ ]?(.*)$'); final _indentPattern = RegExp(r'^(?: | {0,3}\t)(.*)$'); /// Fenced code block. -final _codePattern = RegExp(r'^[ ]{0,3}(`{3,}|~{3,})(.*)$'); +final _codeFencePattern = RegExp(r'^[ ]{0,3}(`{3,}|~{3,})(.*)$'); /// Three or more hyphens, asterisks or underscores by themselves. Note that /// a line like `----` is valid as both HR and SETEXT. In case of a tie, @@ -265,7 +267,7 @@ class SetextHeaderSyntax extends BlockSyntax { bool _interperableAsParagraph(String line) => !(_indentPattern.hasMatch(line) || - _codePattern.hasMatch(line) || + _codeFencePattern.hasMatch(line) || _headerPattern.hasMatch(line) || _blockquotePattern.hasMatch(line) || _hrPattern.hasMatch(line) || @@ -404,12 +406,28 @@ class CodeBlockSyntax extends BlockSyntax { /// Parses preformatted code blocks between two ~~~ or ``` sequences. /// -/// See [Pandoc's documentation](http://pandoc.org/README.html#fenced-code-blocks). +/// See the CommonMark spec: https://spec.commonmark.org/0.29/#fenced-code-blocks class FencedCodeBlockSyntax extends BlockSyntax { - RegExp get pattern => _codePattern; + RegExp get pattern => _codeFencePattern; const FencedCodeBlockSyntax(); + bool canParse(BlockParser parser) { + var match = pattern.firstMatch(parser.current); + if (match == null) return false; + final codeFence = match.group(1); + final infoString = match.group(2); + // From the CommonMark spec: + // + // > If the info string comes after a backtick fence, it may not contain + // > any backtick characters. + if (codeFence.codeUnitAt(0) == $backquote && + infoString.codeUnits.contains($backquote)) { + return false; + } + return true; + } + List parseChildLines(BlockParser parser, [String endBlock]) { if (endBlock == null) endBlock = ''; diff --git a/test/common_mark/code_spans.unit b/test/common_mark/code_spans.unit index 069bd88a..6864bf7e 100644 --- a/test/common_mark/code_spans.unit +++ b/test/common_mark/code_spans.unit @@ -86,8 +86,7 @@ baz` >>> Code spans - 347 ```foo`` <<< -

-
+

```foo``

>>> Code spans - 348 `foo <<< diff --git a/test/common_mark/fenced_code_blocks.unit b/test/common_mark/fenced_code_blocks.unit index 2e305246..11ab448e 100644 --- a/test/common_mark/fenced_code_blocks.unit +++ b/test/common_mark/fenced_code_blocks.unit @@ -166,9 +166,8 @@ aaa ``` ``` aaa <<< -
aaa
-
-
+

+aaa

>>> Fenced code blocks - 109 ~~~~~~ aaa @@ -232,9 +231,8 @@ end ``` aa ``` foo <<< -
foo
-
-
+

aa +foo

>>> Fenced code blocks - 116 ~~~ aa ``` ~~~ foo diff --git a/test/gfm/code_spans.unit b/test/gfm/code_spans.unit index 6789f574..f6203719 100644 --- a/test/gfm/code_spans.unit +++ b/test/gfm/code_spans.unit @@ -86,8 +86,7 @@ baz` >>> Code spans - 357 ```foo`` <<< -

-
+

```foo``

>>> Code spans - 358 `foo <<< diff --git a/test/gfm/fenced_code_blocks.unit b/test/gfm/fenced_code_blocks.unit index 2e305246..11ab448e 100644 --- a/test/gfm/fenced_code_blocks.unit +++ b/test/gfm/fenced_code_blocks.unit @@ -166,9 +166,8 @@ aaa ``` ``` aaa <<< -
aaa
-
-
+

+aaa

>>> Fenced code blocks - 109 ~~~~~~ aaa @@ -232,9 +231,8 @@ end ``` aa ``` foo <<< -
foo
-
-
+

aa +foo

>>> Fenced code blocks - 116 ~~~ aa ``` ~~~ foo diff --git a/tool/common_mark_stats.json b/tool/common_mark_stats.json index a250f903..26f32eba 100644 --- a/tool/common_mark_stats.json +++ b/tool/common_mark_stats.json @@ -105,7 +105,7 @@ "344": "strict", "345": "strict", "346": "strict", - "347": "fail", + "347": "strict", "348": "strict", "349": "strict" }, @@ -281,14 +281,14 @@ "105": "strict", "106": "strict", "107": "loose", - "108": "fail", + "108": "loose", "109": "loose", "110": "strict", "111": "strict", "112": "strict", "113": "strict", "114": "strict", - "115": "fail", + "115": "strict", "116": "strict", "117": "fail" }, diff --git a/tool/common_mark_stats.txt b/tool/common_mark_stats.txt index 249ed100..4bbd5af7 100644 --- a/tool/common_mark_stats.txt +++ b/tool/common_mark_stats.txt @@ -3,10 +3,10 @@ 12 of 13 – 92.3% Backslash escapes 1 of 1 – 100.0% Blank lines 22 of 25 – 88.0% Block quotes - 21 of 22 – 95.5% Code spans + 22 of 22 – 100.0% Code spans 125 of 131 – 95.4% Emphasis and strong emphasis 14 of 17 – 82.4% Entity and numeric character references - 26 of 29 – 89.7% Fenced code blocks + 28 of 29 – 96.6% Fenced code blocks 15 of 15 – 100.0% Hard line breaks 43 of 43 – 100.0% HTML blocks 21 of 22 – 95.5% Images @@ -24,5 +24,5 @@ 11 of 11 – 100.0% Tabs 3 of 3 – 100.0% Textual content 19 of 19 – 100.0% Thematic breaks - 598 of 649 – 92.1% TOTAL - 525 of 598 – 87.8% TOTAL Strict + 601 of 649 – 92.6% TOTAL + 527 of 601 – 87.7% TOTAL Strict diff --git a/tool/gfm_stats.json b/tool/gfm_stats.json index 243cd6cd..0abf0c33 100644 --- a/tool/gfm_stats.json +++ b/tool/gfm_stats.json @@ -118,7 +118,7 @@ "354": "strict", "355": "strict", "356": "strict", - "357": "fail", + "357": "strict", "358": "strict", "359": "strict" }, @@ -297,14 +297,14 @@ "105": "strict", "106": "strict", "107": "loose", - "108": "fail", + "108": "loose", "109": "loose", "110": "strict", "111": "strict", "112": "strict", "113": "strict", "114": "strict", - "115": "fail", + "115": "strict", "116": "strict", "117": "fail" }, diff --git a/tool/gfm_stats.txt b/tool/gfm_stats.txt index ab2fb170..d471838e 100644 --- a/tool/gfm_stats.txt +++ b/tool/gfm_stats.txt @@ -4,11 +4,11 @@ 12 of 13 – 92.3% Backslash escapes 1 of 1 – 100.0% Blank lines 22 of 25 – 88.0% Block quotes - 21 of 22 – 95.5% Code spans + 22 of 22 – 100.0% Code spans 0 of 1 – 0.0% Disallowed Raw HTML (extension) 125 of 131 – 95.4% Emphasis and strong emphasis 14 of 17 – 82.4% Entity and numeric character references - 26 of 29 – 89.7% Fenced code blocks + 28 of 29 – 96.6% Fenced code blocks 15 of 15 – 100.0% Hard line breaks 43 of 43 – 100.0% HTML blocks 21 of 22 – 95.5% Images @@ -28,5 +28,5 @@ 11 of 11 – 100.0% Tabs 3 of 3 – 100.0% Textual content 19 of 19 – 100.0% Thematic breaks - 613 of 671 – 91.4% TOTAL - 531 of 613 – 86.6% TOTAL Strict + 616 of 671 – 91.8% TOTAL + 533 of 616 – 86.5% TOTAL Strict From e1879364ae3898e8bc5d6696e699513d7f8291f0 Mon Sep 17 00:00:00 2001 From: Sam Rawlins Date: Thu, 12 Sep 2019 08:39:34 -0700 Subject: [PATCH 2/2] Feedback --- lib/src/block_parser.dart | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/lib/src/block_parser.dart b/lib/src/block_parser.dart index b69b7f64..ba072183 100644 --- a/lib/src/block_parser.dart +++ b/lib/src/block_parser.dart @@ -413,7 +413,7 @@ class FencedCodeBlockSyntax extends BlockSyntax { const FencedCodeBlockSyntax(); bool canParse(BlockParser parser) { - var match = pattern.firstMatch(parser.current); + final match = pattern.firstMatch(parser.current); if (match == null) return false; final codeFence = match.group(1); final infoString = match.group(2); @@ -421,11 +421,8 @@ class FencedCodeBlockSyntax extends BlockSyntax { // // > If the info string comes after a backtick fence, it may not contain // > any backtick characters. - if (codeFence.codeUnitAt(0) == $backquote && - infoString.codeUnits.contains($backquote)) { - return false; - } - return true; + return (codeFence.codeUnitAt(0) != $backquote || + !infoString.codeUnits.contains($backquote)); } List parseChildLines(BlockParser parser, [String endBlock]) {