feat(parser): support substitution prevention (#40)

Signed-off-by: Xavier Coulon <[email protected]>
bytesparadise · Nov 5, 2017 · 8e59c45 · 8e59c45
1 parent f7f82e9
commit 8e59c45
Show file tree

Hide file tree

Showing 8 changed files with 1,678 additions and 570 deletions.
diff --git a/README.adoc b/README.adoc
@@ -12,10 +12,10 @@ It is is available under the terms of the https://raw.githubusercontent.com/byte
 * Title and Sections (level 1 to 6)
 * Document attribute declaration (after the title and within the rest of the document) and substitution
 * Paragraphs
-* Delimited Source Blocks (using the \``` delimiter, a.k.a "fences")
+* Delimited Source Blocks (using the `+++```+++` delimiter, a.k.a "fences")
 * Literal blocks (paragraph starting with a space, with the "...." delimiter or with the `[literal]` attribute)
-* Quoted text (+bold+, _italic_ and `monospace`)
-* Unordered lists, using the `-` marker for simple lists, or the `\*` marker for nested lists (and `\**`, `\***`, etc. for the sublists) .
+* Quoted text (+bold+, _italic_ and `monospace`) and substitution prevention using the `\` escape character
+* Unordered lists, using the `-` marker for simple lists, or the `\*` marker for nested lists (and `\**`, `\***`, etc. for the sublists)
 * External links in paragraphs (`https://`, `http://`, `ftp://`, `irc://`, `mailto:`)
 * Inline images in paragraphs (`image://`)
 * Block images (`image:://`)
@@ -36,13 +36,13 @@ Libasciidoc provides 2 functions to convert an Asciidoc content into HTML:
 
 1. Converting to a complete HTML document:
 
-    func ConvertToHTML(context.Context, io.Reader, io.Writer, renderer.Options) error
+    func ConvertToHTML(context.Context, io.Reader, io.Writer, renderer.Option...) (map[string]interface{}, error)
 
 2. Converting to a `body` element only:
 
-    func ConvertToHTMLBody(context.Context, io.Reader, io.Writer) (*types.DocumentAttributes, error)
+    func ConvertToHTMLBody(context.Context, io.Reader, io.Writer) (map[string]interface{}, error)
 
-where the returned `types.DocumentAttributes` object contains the document's title (which is not rendered in the HTML's body) and its other attributes.
+where the returned `map[string]interface{}` object contains the document's title (which is not rendered in the HTML's body) and its other attributes.
 
 == How to contribute
 

diff --git a/README.html b/README.html
@@ -32,10 +32,10 @@ <h2 id="_supported_syntax">Supported syntax</h2>
 <p>Literal blocks (paragraph starting with a space, with the "&#8230;&#8203;." delimiter or with the <code>[literal]</code> attribute)</p>
 </li>
 <li>
-<p>Quoted text (bold, <em>italic</em> and <code>monospace</code>)</p>
+<p>Quoted text (bold, <em>italic</em> and <code>monospace</code>) and substitution prevention using the <code>\</code> escape character</p>
 </li>
 <li>
-<p>Unordered lists, using the <code>-</code> marker for simple lists, or the <code>*</code> marker for nested lists (and <code>**</code>, <code>***</code>, etc. for the sublists) .</p>
+<p>Unordered lists, using the <code>-</code> marker for simple lists, or the <code>*</code> marker for nested lists (and <code>**</code>, <code>***</code>, etc. for the sublists)</p>
 </li>
 <li>
 <p>External links in paragraphs (<code>https://</code>, <code>http://</code>, <code>ftp://</code>, <code>irc://</code>, <code>mailto:</code>)</p>
@@ -52,7 +52,7 @@ <h2 id="_supported_syntax">Supported syntax</h2>
 </ul>
 </div>
 <div class="paragraph">
-<p>See the <a href="http://LIMITATIONS.adoc">known limitations</a> document for differences between Asciidoc/Asciidoctor and Libasciidoc.</p>
+<p>See the <a href="http://LIMITATIONS.adoc">known limitations</a> page for differences between Asciidoc/Asciidoctor and Libasciidoc.</p>
 </div>
 <div class="paragraph">
 <p>Further elements will be supported in the future. Feel free to open issues <a href="https://github.com/bytesparadise/libasciidoc/issues">here</a> to help prioritizing the upcoming work.</p>
@@ -81,15 +81,15 @@ <h2 id="_usage">Usage</h2>
 <p>Converting to a complete HTML document:</p>
 <div class="literalblock">
 <div class="content">
-<pre>func ConvertToHTML(context.Context, io.Reader, io.Writer, renderer.Options) error</pre>
+<pre>func ConvertToHTML(context.Context, io.Reader, io.Writer, renderer.Option...) (map[string]interface{}, error)</pre>
 </div>
 </div>
 </li>
 <li>
 <p>Converting to a <code>body</code> element only:</p>
 <div class="literalblock">
 <div class="content">
-<pre>func ConvertToHTMLBody(context.Context, io.Reader, io.Writer) (*types.DocumentAttributes, error)</pre>
+<pre>func ConvertToHTMLBody(context.Context, io.Reader, io.Writer) (map[string]interface{}, error)</pre>
 </div>
 </div>
 </li>
@@ -104,7 +104,7 @@ <h2 id="_usage">Usage</h2>
 <h2 id="_how_to_contribute">How to contribute</h2>
 <div class="sectionbody">
 <div class="paragraph">
-<p>Please refer to the <a href="http://CONTRIBUTE.adoc">Contribute</a> file.</p>
+<p>Please refer to the <a href="http://CONTRIBUTE.adoc">Contribute</a> page.</p>
 </div>
 </div>
 </div>
diff --git a/parser/asciidoc-grammar.peg b/parser/asciidoc-grammar.peg
@@ -199,75 +199,121 @@ ListItem <- WS* level:("*"+ / "-") WS+ content:(ListItemContent) {
 }
 
 ListItemContent <- lines:(!(WS* ("*"+ / "-") WS+) InlineContent EOL)+ { 
-    return types.NewListItemContent(c.text, lines.([]interface{}))
+    return types.NewListItemContent(lines.([]interface{}))
 } 
 // ------------------------------------------
 // Paragraphs
 // ------------------------------------------
 // a paragraph is a group of line ending with a blank line (or end of file)
 // a paragraph cannot start with the `section` sequence (`= `, `== `, etc.)
 Paragraph <- attributes:(ElementAttribute)* !("="+ WS+) lines:(InlineContent EOL)+ {
-    return types.NewParagraph(c.text, lines.([]interface{}), attributes.([]interface{}))
+    return types.NewParagraph(lines.([]interface{}), attributes.([]interface{}))
 } 
 
 // an inline content element may start with and end with spaces, 
 // but it must contain at least an inline element (image, quoted text, external link, document attribute substitution, word, etc.)
 InlineContent <- !FencedBlockDelimiter elements:(WS* InlineElement WS*)+ &EOL { // needs an "EOL" but does not consume it here.
-    return types.NewInlineContent(c.text, elements.([]interface{}))
+    return types.NewInlineContent(elements.([]interface{}))
 } 
 
 InlineElement <- InlineImage / QuotedText / ExternalLink / DocumentAttributeSubstitution / Word
 
 // ------------------------------------------
 // Quoted Texts (bold, italic and monospace)
 // ------------------------------------------
-QuotedText <- BoldText / ItalicText / MonospaceText
+QuotedText <- BoldText / ItalicText / MonospaceText /
+            EscapedBoldText / EscapedItalicText / EscapedMonospaceText
 
 BoldText <- BoldTextDoublePunctuation / BoldTextUnbalancedPunctuation / BoldTextSimplePunctuation // double punctuation must be evaluated first
 
-BoldTextSimplePunctuation <- "*" content:(QuotedTextContent) "*" {
+BoldTextSimplePunctuation <- !`\` "*" content:(QuotedTextContent) "*" {
     return types.NewQuotedText(types.Bold, content.([]interface{}))
 }
 
-BoldTextDoublePunctuation <- "**" content:(QuotedTextContent) "**" {
+BoldTextDoublePunctuation <- !`\\` "**" content:(QuotedTextContent) "**" {
     return types.NewQuotedText(types.Bold, content.([]interface{}))
 }
 
-BoldTextUnbalancedPunctuation <- "**" content:(QuotedTextContent) "*" { // unbalanced `**` vs `*` punctuation
+BoldTextUnbalancedPunctuation <- !`\\` "**" content:(QuotedTextContent) "*" { // unbalanced `**` vs `*` punctuation
     result := append([]interface{}{"*"}, content.([]interface{}))
     return types.NewQuotedText(types.Bold, result)
 }
 
+EscapedBoldText <- EscapedBoldTextDoublePunctuation / EscapedBoldTextUnbalancedPunctuation / EscapedBoldTextSimplePunctuation // double punctuation must be evaluated first
+
+EscapedBoldTextSimplePunctuation <- backslashes:(`\` `\`*) "*" content:(QuotedTextContent) "*" {
+    return types.NewEscapedQuotedText(backslashes.([]interface{}), "*", content.([]interface{}))
+}
+
+EscapedBoldTextDoublePunctuation <- backslashes:(`\\` `\`*) "**" content:(QuotedTextContent) "**" {
+    return types.NewEscapedQuotedText(backslashes.([]interface{}), "**", content.([]interface{}))
+}
+
+EscapedBoldTextUnbalancedPunctuation <-  backslashes:(`\` `\`*) "**" content:(QuotedTextContent) "*" { // unbalanced `**` vs `*` punctuation
+    result := append([]interface{}{"*"}, content.([]interface{}))
+    return types.NewEscapedQuotedText(backslashes.([]interface{}), "*", result)
+}
+
 ItalicText <- ItalicTextDoublePunctuation / ItalicTextUnbalancedPunctuation / ItalicTextSimplePunctuation
 
-ItalicTextSimplePunctuation <- "_" content:(QuotedTextContent) "_" {
+ItalicTextSimplePunctuation <- !`\` "_" content:(QuotedTextContent) "_" {
     return types.NewQuotedText(types.Italic, content.([]interface{}))
 }
 
-ItalicTextDoublePunctuation <- "__" content:(QuotedTextContent) "__" {
+ItalicTextDoublePunctuation <- !`\\` "__" content:(QuotedTextContent) "__" {
     return types.NewQuotedText(types.Italic, content.([]interface{}))
 }
 
-ItalicTextUnbalancedPunctuation <- "__" content:(QuotedTextContent) "_" { // unbalanced `**` vs `*` punctuation
+ItalicTextUnbalancedPunctuation <- !`\\` "__" content:(QuotedTextContent) "_" { // unbalanced `__` vs `_` punctuation
     result := append([]interface{}{"_"}, content.([]interface{}))
     return types.NewQuotedText(types.Italic, result)
 }
 
+EscapedItalicText <- EscapedItalicTextDoublePunctuation / EscapedItalicTextUnbalancedPunctuation / EscapedItalicTextSimplePunctuation // double punctuation must be evaluated first
+
+EscapedItalicTextSimplePunctuation <- backslashes:(`\` `\`*) "_" content:(QuotedTextContent) "_" {
+    return types.NewEscapedQuotedText(backslashes.([]interface{}), "_", content.([]interface{}))
+}
+
+EscapedItalicTextDoublePunctuation <- backslashes:(`\\` `\`*) "__" content:(QuotedTextContent) "__" {
+    return types.NewEscapedQuotedText(backslashes.([]interface{}), "__", content.([]interface{}))
+}
+
+EscapedItalicTextUnbalancedPunctuation <-  backslashes:(`\` `\`*) "__" content:(QuotedTextContent) "_" { // unbalanced `__` vs `_` punctuation
+    result := append([]interface{}{"_"}, content.([]interface{}))
+    return types.NewEscapedQuotedText(backslashes.([]interface{}), "_", result)
+}
+
 MonospaceText <- MonospaceTextDoublePunctuation / MonospaceTextUnbalancedPunctuation / MonospaceTextSimplePunctuation
 
-MonospaceTextSimplePunctuation <- "`" content:(QuotedTextContent) "`" {
+MonospaceTextSimplePunctuation <- !`\` "`" content:(QuotedTextContent) "`" {
     return types.NewQuotedText(types.Monospace, content.([]interface{}))
 }
 
-MonospaceTextDoublePunctuation <- "``" content:(QuotedTextContent) "``" {
+MonospaceTextDoublePunctuation <- !`\\` "``" content:(QuotedTextContent) "``" {
     return types.NewQuotedText(types.Monospace, content.([]interface{}))
 }
 
-MonospaceTextUnbalancedPunctuation <- "``" content:(QuotedTextContent) "`" { // unbalanced `**` vs `*` punctuation
+MonospaceTextUnbalancedPunctuation <- !`\\` "``" content:(QuotedTextContent) "`" { // unbalanced "``" vs "`" punctuation
     result := append([]interface{}{"`"}, content.([]interface{}))
     return types.NewQuotedText(types.Monospace, result)
 }
 
+EscapedMonospaceText <- EscapedMonospaceTextDoublePunctuation / EscapedMonospaceTextUnbalancedPunctuation / EscapedMonospaceTextSimplePunctuation // double punctuation must be evaluated first
+
+EscapedMonospaceTextSimplePunctuation <- backslashes:(`\` `\`*) "`" content:(QuotedTextContent) "`" {
+    return types.NewEscapedQuotedText(backslashes.([]interface{}), "`", content.([]interface{}))
+}
+
+EscapedMonospaceTextDoublePunctuation <- backslashes:(`\\` `\`*) "``" content:(QuotedTextContent) "``" {
+    return types.NewEscapedQuotedText(backslashes.([]interface{}), "``", content.([]interface{}))
+}
+
+EscapedMonospaceTextUnbalancedPunctuation <-  backslashes:(`\` `\`*) "``" content:(QuotedTextContent) "`" { // unbalanced "``" vs "`" punctuation
+    result := append([]interface{}{"`"}, content.([]interface{}))
+    return types.NewEscapedQuotedText(backslashes.([]interface{}), "`", result)
+}
+
 QuotedTextContent <- QuotedTextContentElement (WS+ QuotedTextContentElement)*
 
 QuotedTextContentElement <- QuotedText / QuotedTextWord / WordWithQuotePunctuation // word with quote punctuation is only accepted if nothing matched before, so we have a chance to stop
@@ -296,20 +342,20 @@ ExternalLink <- url:(URL_SCHEME URL) text:("[" (URL_TEXT)* "]")? {
 // ------------------------------------------
 BlockImage <- attributes:(ElementAttribute)* image:BlockImageMacro  WS* EOL {
     // here we can ignore the blank line in the returned element
-    return types.NewBlockImage(c.text, *image.(*types.ImageMacro), attributes.([]interface{}))
+    return types.NewBlockImage(*image.(*types.ImageMacro), attributes.([]interface{}))
 }
 
 BlockImageMacro <- "image::" path:(URL) "[" attributes:(URL_TEXT?) "]" {
-    return types.NewImageMacro(c.text, path.(string), attributes)
+    return types.NewImageMacro(path.(string), attributes)
 }
 
 InlineImage <- image:InlineImageMacro {
     // here we can ignore the blank line in the returned element
-    return types.NewInlineImage(c.text, *image.(*types.ImageMacro))
+    return types.NewInlineImage(*image.(*types.ImageMacro))
 }
 
 InlineImageMacro <- "image:" !":" path:(URL) "[" attributes:(URL_TEXT?) "]" {
-    return types.NewImageMacro(c.text, path.(string), attributes)
+    return types.NewImageMacro(path.(string), attributes)
 }
 
 // ------------------------------------------------------------------------------------