From f0ea2573336f783a82a23b46abe43bea5ada3537 Mon Sep 17 00:00:00 2001 From: Jerome Simeon Date: Tue, 30 Jun 2020 16:16:26 -0400 Subject: [PATCH] test(html) Bring test coverage back up for HTML transform - Smarter whitespace rule Signed-off-by: Jerome Simeon --- .../markdown-html/src/HtmlTransformer.test.js | 110 +- .../markdown-html/src/ToCiceroMarkVisitor.js | 20 +- .../markdown-html/src/ToHtmlStringVisitor.js | 81 +- .../HtmlTransformer.test.js.snap | 819 +- packages/markdown-html/src/helpers.js | 8 +- packages/markdown-html/src/rules.js | 108 +- .../test/data/ciceromark/acceptance.html | 26 + .../test/data/ciceromark/acceptance.json | 1 + .../test/data/ciceromark/fixedinterest.html | 11 + .../test/data/ciceromark/fixedinterest.json | 1 + .../test/data/ciceromark/latedelivery.html | 13 + .../test/data/ciceromark/latedelivery.json | 1 + .../test/data/{ => markdown}/blockquote.md | 0 .../data/{ => markdown}/codeblock-info.md | 0 .../test/data/{ => markdown}/codeblock.md | 0 .../test/data/{ => markdown}/emph-strong.md | 0 .../test/data/{ => markdown}/emph.md | 0 .../test/data/{ => markdown}/h1.md | 0 .../test/data/{ => markdown}/h2.md | 0 .../test/data/{ => markdown}/h3.md | 0 .../test/data/{ => markdown}/h4.md | 0 .../test/data/{ => markdown}/h5.md | 0 .../test/data/{ => markdown}/h6.md | 0 .../test/data/{ => markdown}/html-inline.md | 0 .../test/data/{ => markdown}/html-mixed.md | 0 .../test/data/{ => markdown}/inline-code.md | 0 .../test/data/{ => markdown}/link.md | 0 .../{ => markdown}/multiline-html-block.md | 0 .../test/data/{ => markdown}/ol-tight.md | 0 .../test/data/{ => markdown}/ol.md | 0 .../test/data/{ => markdown}/paragraphs.md | 0 .../test/data/{ => markdown}/strong.md | 0 .../test/data/{ => markdown}/text.md | 0 .../test/data/{ => markdown}/thematicbreak.md | 0 .../test/data/{ => markdown}/ul-tight.md | 0 .../test/data/{ => markdown}/ul.md | 0 packages/markdown-html/test/data/spec.txt | 9710 ----------------- .../test/data/acceptance/sample.html | 24 +- 38 files changed, 1006 insertions(+), 9927 deletions(-) create mode 100644 packages/markdown-html/test/data/ciceromark/acceptance.html create mode 100644 packages/markdown-html/test/data/ciceromark/acceptance.json create mode 100644 packages/markdown-html/test/data/ciceromark/fixedinterest.html create mode 100644 packages/markdown-html/test/data/ciceromark/fixedinterest.json create mode 100644 packages/markdown-html/test/data/ciceromark/latedelivery.html create mode 100644 packages/markdown-html/test/data/ciceromark/latedelivery.json rename packages/markdown-html/test/data/{ => markdown}/blockquote.md (100%) rename packages/markdown-html/test/data/{ => markdown}/codeblock-info.md (100%) rename packages/markdown-html/test/data/{ => markdown}/codeblock.md (100%) rename packages/markdown-html/test/data/{ => markdown}/emph-strong.md (100%) rename packages/markdown-html/test/data/{ => markdown}/emph.md (100%) rename packages/markdown-html/test/data/{ => markdown}/h1.md (100%) rename packages/markdown-html/test/data/{ => markdown}/h2.md (100%) rename packages/markdown-html/test/data/{ => markdown}/h3.md (100%) rename packages/markdown-html/test/data/{ => markdown}/h4.md (100%) rename packages/markdown-html/test/data/{ => markdown}/h5.md (100%) rename packages/markdown-html/test/data/{ => markdown}/h6.md (100%) rename packages/markdown-html/test/data/{ => markdown}/html-inline.md (100%) rename packages/markdown-html/test/data/{ => markdown}/html-mixed.md (100%) rename packages/markdown-html/test/data/{ => markdown}/inline-code.md (100%) rename packages/markdown-html/test/data/{ => markdown}/link.md (100%) rename packages/markdown-html/test/data/{ => markdown}/multiline-html-block.md (100%) rename packages/markdown-html/test/data/{ => markdown}/ol-tight.md (100%) rename packages/markdown-html/test/data/{ => markdown}/ol.md (100%) rename packages/markdown-html/test/data/{ => markdown}/paragraphs.md (100%) rename packages/markdown-html/test/data/{ => markdown}/strong.md (100%) rename packages/markdown-html/test/data/{ => markdown}/text.md (100%) rename packages/markdown-html/test/data/{ => markdown}/thematicbreak.md (100%) rename packages/markdown-html/test/data/{ => markdown}/ul-tight.md (100%) rename packages/markdown-html/test/data/{ => markdown}/ul.md (100%) delete mode 100644 packages/markdown-html/test/data/spec.txt diff --git a/packages/markdown-html/src/HtmlTransformer.test.js b/packages/markdown-html/src/HtmlTransformer.test.js index 4ec15d56..e9898a35 100644 --- a/packages/markdown-html/src/HtmlTransformer.test.js +++ b/packages/markdown-html/src/HtmlTransformer.test.js @@ -23,6 +23,17 @@ const HtmlTransformer = require('./HtmlTransformer'); let htmlTransformer = null; let ciceroTransformer = null; +/** + * Prepare the text for parsing (normalizes new lines, etc) + * @param {string} input - the text for the clause + * @return {string} - the normalized text for the clause + */ +function normalizeNLs(input) { + // we replace all \r and \n with \n + let text = input.replace(/\r/gm,''); + return text; +} + // @ts-ignore beforeAll(() => { htmlTransformer = new HtmlTransformer(); @@ -35,11 +46,11 @@ beforeAll(() => { */ function getMarkdownFiles() { const result = []; - const files = fs.readdirSync(__dirname + '/../test/data'); + const files = fs.readdirSync(__dirname + '/../test/data/markdown'); files.forEach(function(file) { if(file.endsWith('.md')) { - let contents = fs.readFileSync(__dirname + '/../test/data/' + file, 'utf8'); + let contents = fs.readFileSync(__dirname + '/../test/data/markdown/' + file, 'utf8'); result.push([file, contents]); } }); @@ -47,51 +58,7 @@ function getMarkdownFiles() { return result; } -/** - * Get the name and contents of all markdown snippets - * used in a commonmark spec file - * @returns {*} an array of name/contents tuples - */ -function getMarkdownSpecFiles() { - const result = []; - const specExamples = extractSpecTests(__dirname + '/../test/data/spec.txt'); - specExamples.forEach(function(example) { - result.push([`${example.section}-${example.number}`, example.markdown]); - }); - - return result; -} - -/** - * Extracts all the test md snippets from a commonmark spec file - * @param {string} testfile the file to use - * @return {*} the examples - */ -function extractSpecTests(testfile) { - let data = fs.readFileSync(testfile, 'utf8'); - let examples = []; - let current_section = ''; - let example_number = 0; - let tests = data - .replace(/\r\n?/g, '\n') // Normalize newlines for platform independence - .replace(/^(.|[\n])*/m, ''); - - tests.replace(/^`{32} example\n([\s\S]*?)^\.\n([\s\S]*?)^`{32}$|^#{1,6} *(.*)$/gm, - function(_, markdownSubmatch, htmlSubmatch, sectionSubmatch){ - if (sectionSubmatch) { - current_section = sectionSubmatch; - } else { - example_number++; - examples.push({markdown: markdownSubmatch, - html: htmlSubmatch, - section: current_section, - number: example_number}); - } - }); - return examples; -} - -describe.only('html', () => { +describe('markdown <-> html', () => { getMarkdownFiles().forEach(([file, markdownText], i) => { it(`converts ${file} to html`, () => { const json = ciceroTransformer.fromMarkdown(markdownText, 'json'); @@ -111,13 +78,46 @@ describe.only('html', () => { }); }); -describe('markdown-spec', () => { - getMarkdownSpecFiles().forEach( ([file, markdownText]) => { - it(`converts ${file} to concerto JSON`, () => { - const json = ciceroTransformer.fromMarkdown(markdownText, 'json'); - expect(json).toMatchSnapshot(); // (1) - const html = htmlTransformer.toHtml(json); - expect(html).toMatchSnapshot(); // (2) +/** + * Get the name and contents of all ciceromark test files + * @returns {*} an array of name/contents tuples + */ +function getCiceroMarkFiles() { + const result = []; + const files = fs.readdirSync(__dirname + '/../test/data/ciceromark'); + + files.forEach(function(file) { + if(file.endsWith('.json')) { + let contents = normalizeNLs(fs.readFileSync(__dirname + '/../test/data/ciceromark/' + file, 'utf8')); + result.push([file, contents]); + } + }); + + return result; +} + +describe('ciceromark <-> html', () => { + getCiceroMarkFiles().forEach( ([file, jsonText], index) => { + it(`converts ${file} to and from CiceroMark`, () => { + const value = JSON.parse(jsonText); + const html = htmlTransformer.toHtml(value); + + // check no changes to html + expect(html).toMatchSnapshot(); // (1) + + // load expected html + const expectedHtml = normalizeNLs(fs.readFileSync(__dirname + '/../test/data/ciceromark/' + file.replace(/.json$/,'.html'), 'utf8')); + expect(expectedHtml).toMatchSnapshot(); // (2) + + // convert the expected html and compare + const expectedCiceroMarkValue = htmlTransformer.toCiceroMark(expectedHtml); + expect(expectedCiceroMarkValue).toMatchSnapshot(); // (3) + + // check that html created from ciceromark and from the expected html is the same + expect(html).toEqual(expectedHtml); + + // check roundtrip + expect(expectedCiceroMarkValue).toEqual(value); }); }); -}); \ No newline at end of file +}); diff --git a/packages/markdown-html/src/ToCiceroMarkVisitor.js b/packages/markdown-html/src/ToCiceroMarkVisitor.js index 94a46524..4f0e2121 100644 --- a/packages/markdown-html/src/ToCiceroMarkVisitor.js +++ b/packages/markdown-html/src/ToCiceroMarkVisitor.js @@ -51,9 +51,10 @@ class ToCiceroMarkVisitor { * Deserialize a DOM element. * * @param {Object} element DOM element + * @param {boolean} ignoreSpace override * @return {Any} node */ - deserializeElement(element) { + deserializeElement(element, ignoreSpace) { let node; //console.log('tagName', element.tagName); @@ -61,16 +62,16 @@ class ToCiceroMarkVisitor { element.tagName = ''; } - const next = elements => { + const next = (elements, ignoreSpace) => { if (Object.prototype.toString.call(elements) === '[object NodeList]') { elements = Array.from(elements); } switch (typeOf(elements)) { case 'array': - return this.deserializeElements(elements); + return this.deserializeElements(elements, ignoreSpace); case 'object': - return this.deserializeElement(elements); + return this.deserializeElement(elements, ignoreSpace); case 'null': case 'undefined': return; @@ -83,7 +84,7 @@ class ToCiceroMarkVisitor { for (const rule of this.rules) { if (!rule.deserialize) {continue;} - const ret = rule.deserialize(element, next); + const ret = rule.deserialize(element, next, ignoreSpace); const type = typeOf(ret); if ( @@ -118,21 +119,22 @@ class ToCiceroMarkVisitor { break; } - return node || next(element.childNodes); + return node || next(element.childNodes, ignoreSpace); } /** * Deserialize an array of DOM elements. * * @param {Array} elements DOM elements + * @param {boolean} ignoreSpace override * @return {Array} array of nodes */ - deserializeElements(elements = []) { + deserializeElements(elements = [], ignoreSpace) { let nodes = []; elements.filter(this.cruftNewline).forEach(element => { // console.log('element -- ', element); - const node = this.deserializeElement(element); + const node = this.deserializeElement(element, ignoreSpace); // console.log('node -- ', node); switch (typeOf(node)) { @@ -166,7 +168,7 @@ class ToCiceroMarkVisitor { } const children = Array.from(fragment.childNodes); // console.log('children -- ', children); - const nodes = this.deserializeElements(children); + const nodes = this.deserializeElements(children, true); // console.log('nodes', nodes); return { '$class': `${NS_PREFIX_CommonMarkModel}${'Document'}`, diff --git a/packages/markdown-html/src/ToHtmlStringVisitor.js b/packages/markdown-html/src/ToHtmlStringVisitor.js index ce02e006..ebaf47d9 100644 --- a/packages/markdown-html/src/ToHtmlStringVisitor.js +++ b/packages/markdown-html/src/ToHtmlStringVisitor.js @@ -54,32 +54,6 @@ class ToHtmlStringVisitor { return parameters.result; } - /** - * Set parameters for inner node - * @param {*} parametersOut - the current parameters - * @return {*} the new parameters with first set to true - */ - static mkParametersIn(parametersOut) { - let parameters = {}; - parameters.result = ''; - parameters.first = true; - parameters.indent = parametersOut.indent; // Same indentation - return parameters; - } - - /** - * Set parameters for inner list - * @param {*} parametersOut - the current parameters - * @return {*} the new parameters with first set to true - */ - static mkParametersInList(parametersOut) { - let parameters = {}; - parameters.result = ''; - parameters.first = true; - parameters.indent = parametersOut.indent+1; // Increases indentation - return parameters; - } - /** * Visit a node * @param {*} thing the object being visited @@ -88,31 +62,64 @@ class ToHtmlStringVisitor { visit(thing, parameters) { switch(thing.getType()) { - case 'Clause': - // { - // const ciceroMarkTransformer = new CiceroMarkTransformer(); - // console.log(JSON.stringify(ciceroMarkTransformer.getSerializer().toJSON(thing), null, 4)); - // } - parameters.result += `
\n${ToHtmlStringVisitor.visitChildren(this, thing)}
\n`; + case 'Clause': { + let attributes = `class="clause" name="${thing.name}"`; + if (thing.elementType) { + attributes += ` elementType="${thing.elementType}"`; + } + if (thing.src) { + attributes += ` src="${thing.src}"`; + } + parameters.result += `
\n${ToHtmlStringVisitor.visitChildren(this, thing)}
\n`; + } break; case 'Variable': { - parameters.result += `${thing.value}`; + let attributes = `class="variable" name="${thing.name}"`; + if (thing.elementType) { + attributes += ` elementType="${thing.elementType}"`; + } + if (thing.identifiedBy) { + attributes += ` identifiedBy="${thing.identifiedBy}"`; + } + parameters.result += `${thing.value}`; } break; case 'FormattedVariable': { - parameters.result += `${thing.value}`; + let attributes = `class="variable" name="${thing.name}" format="${thing.format}"`; + if (thing.elementType) { + attributes += ` elementType="${thing.elementType}"`; + } + if (thing.identifiedBy) { + attributes += ` identifiedBy="${thing.identifiedBy}"`; + } + parameters.result += `${thing.value}`; } break; case 'EnumVariable': { const enumValues = encodeURIComponent(JSON.stringify(thing.enumValues)); - parameters.result += `${thing.value}`; + let attributes = `class="variable" name="${thing.name}" enumValues="${enumValues}"`; + if (thing.elementType) { + attributes += ` elementType="${thing.elementType}"`; + } + if (thing.identifiedBy) { + attributes += ` identifiedBy="${thing.identifiedBy}"`; + } + parameters.result += `${thing.value}`; } break; case 'Conditional': parameters.result += `${thing.nodes[0].text}`; break; - case 'Formula': - parameters.result += `${thing.value}`; + case 'Formula': { + let attributes = `class="formula" name="${thing.name}"`; + if (thing.code) { + attributes += ` code="${encodeURIComponent(thing.code)}"`; + } + if (thing.dependencies) { + attributes += ` dependencies="${encodeURIComponent(JSON.stringify(thing.dependencies))}"`; + } + parameters.result += `${thing.value}`; + } break; case 'CodeBlock': { const info = thing.info; diff --git a/packages/markdown-html/src/__snapshots__/HtmlTransformer.test.js.snap b/packages/markdown-html/src/__snapshots__/HtmlTransformer.test.js.snap index 490d545f..0b8cd891 100644 --- a/packages/markdown-html/src/__snapshots__/HtmlTransformer.test.js.snap +++ b/packages/markdown-html/src/__snapshots__/HtmlTransformer.test.js.snap @@ -1,6 +1,725 @@ // Jest Snapshot v1, https://goo.gl/fbAQLP -exports[`html converts blockquote.md to html 1`] = ` +exports[`ciceromark <-> html converts acceptance.json to and from CiceroMark 1`] = ` +" + +
+

Heading

+

And below is a clause.

+
+

Acceptance of Delivery.

+

\\"Party A\\" will be deemed to have completed its delivery obligations +if in \\"Party B\\"'s opinion, the \\"Widgets\\" satisfies the +Acceptance Criteria, and \\"Party B\\" notifies \\"Party A\\" in writing +that it is accepting the \\"Widgets\\".

+

Inspection and Notice.

+

\\"Party B\\" will have 10 Business Days to inspect and +evaluate the \\"Widgets\\" on the delivery date before notifying +\\"Party A\\" that it is either accepting or rejecting the +\\"Widgets\\".

+

Acceptance Criteria.

+

The \\"Acceptance Criteria\\" are the specifications the \\"Widgets\\" +must meet for the \\"Party A\\" to comply with its requirements and +obligations under this agreement, detailed in \\"Attachment X\\", attached +to this agreement.

+
+

More text

+
+ +" +`; + +exports[`ciceromark <-> html converts acceptance.json to and from CiceroMark 2`] = ` +" + +
+

Heading

+

And below is a clause.

+
+

Acceptance of Delivery.

+

\\"Party A\\" will be deemed to have completed its delivery obligations +if in \\"Party B\\"'s opinion, the \\"Widgets\\" satisfies the +Acceptance Criteria, and \\"Party B\\" notifies \\"Party A\\" in writing +that it is accepting the \\"Widgets\\".

+

Inspection and Notice.

+

\\"Party B\\" will have 10 Business Days to inspect and +evaluate the \\"Widgets\\" on the delivery date before notifying +\\"Party A\\" that it is either accepting or rejecting the +\\"Widgets\\".

+

Acceptance Criteria.

+

The \\"Acceptance Criteria\\" are the specifications the \\"Widgets\\" +must meet for the \\"Party A\\" to comply with its requirements and +obligations under this agreement, detailed in \\"Attachment X\\", attached +to this agreement.

+
+

More text

+
+ +" +`; + +exports[`ciceromark <-> html converts acceptance.json to and from CiceroMark 3`] = ` +Object { + "$class": "org.accordproject.commonmark.Document", + "nodes": Array [ + Object { + "$class": "org.accordproject.commonmark.Heading", + "level": "1", + "nodes": Array [ + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "Heading", + }, + ], + }, + Object { + "$class": "org.accordproject.commonmark.Paragraph", + "nodes": Array [ + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "And below is a ", + }, + Object { + "$class": "org.accordproject.commonmark.Strong", + "nodes": Array [ + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "clause", + }, + ], + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": ".", + }, + ], + }, + Object { + "$class": "org.accordproject.ciceromark.Clause", + "elementType": "org.accordproject.acceptanceofdelivery.AcceptanceOfDeliveryClause", + "name": "deliveryClause", + "nodes": Array [ + Object { + "$class": "org.accordproject.commonmark.Heading", + "level": "2", + "nodes": Array [ + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "Acceptance of Delivery.", + }, + ], + }, + Object { + "$class": "org.accordproject.commonmark.Paragraph", + "nodes": Array [ + Object { + "$class": "org.accordproject.ciceromark.Variable", + "elementType": "org.accordproject.organization.Organization", + "identifiedBy": "identifier", + "name": "shipper", + "value": "\\"Party A\\"", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": " will be deemed to have completed its delivery obligations", + }, + Object { + "$class": "org.accordproject.commonmark.Softbreak", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "if in ", + }, + Object { + "$class": "org.accordproject.ciceromark.Variable", + "elementType": "org.accordproject.organization.Organization", + "identifiedBy": "identifier", + "name": "receiver", + "value": "\\"Party B\\"", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "'s opinion, the ", + }, + Object { + "$class": "org.accordproject.ciceromark.Variable", + "elementType": "String", + "name": "deliverable", + "value": "\\"Widgets\\"", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": " satisfies the", + }, + Object { + "$class": "org.accordproject.commonmark.Softbreak", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "Acceptance Criteria, and ", + }, + Object { + "$class": "org.accordproject.ciceromark.Variable", + "elementType": "org.accordproject.organization.Organization", + "identifiedBy": "identifier", + "name": "receiver", + "value": "\\"Party B\\"", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": " notifies ", + }, + Object { + "$class": "org.accordproject.ciceromark.Variable", + "elementType": "org.accordproject.organization.Organization", + "identifiedBy": "identifier", + "name": "shipper", + "value": "\\"Party A\\"", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": " in writing", + }, + Object { + "$class": "org.accordproject.commonmark.Softbreak", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "that it is accepting the ", + }, + Object { + "$class": "org.accordproject.ciceromark.Variable", + "elementType": "String", + "name": "deliverable", + "value": "\\"Widgets\\"", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": ".", + }, + ], + }, + Object { + "$class": "org.accordproject.commonmark.Heading", + "level": "2", + "nodes": Array [ + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "Inspection and Notice.", + }, + ], + }, + Object { + "$class": "org.accordproject.commonmark.Paragraph", + "nodes": Array [ + Object { + "$class": "org.accordproject.ciceromark.Variable", + "elementType": "org.accordproject.organization.Organization", + "identifiedBy": "identifier", + "name": "receiver", + "value": "\\"Party B\\"", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": " will have ", + }, + Object { + "$class": "org.accordproject.ciceromark.Variable", + "elementType": "Long", + "name": "businessDays", + "value": "10", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": " Business Days to inspect and", + }, + Object { + "$class": "org.accordproject.commonmark.Softbreak", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "evaluate the ", + }, + Object { + "$class": "org.accordproject.ciceromark.Variable", + "elementType": "String", + "name": "deliverable", + "value": "\\"Widgets\\"", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": " on the delivery date before notifying", + }, + Object { + "$class": "org.accordproject.commonmark.Softbreak", + }, + Object { + "$class": "org.accordproject.ciceromark.Variable", + "elementType": "org.accordproject.organization.Organization", + "identifiedBy": "identifier", + "name": "shipper", + "value": "\\"Party A\\"", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": " that it is either accepting or rejecting the", + }, + Object { + "$class": "org.accordproject.commonmark.Softbreak", + }, + Object { + "$class": "org.accordproject.ciceromark.Variable", + "elementType": "String", + "name": "deliverable", + "value": "\\"Widgets\\"", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": ".", + }, + ], + }, + Object { + "$class": "org.accordproject.commonmark.Heading", + "level": "2", + "nodes": Array [ + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "Acceptance Criteria.", + }, + ], + }, + Object { + "$class": "org.accordproject.commonmark.Paragraph", + "nodes": Array [ + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "The \\"Acceptance Criteria\\" are the specifications the ", + }, + Object { + "$class": "org.accordproject.ciceromark.Variable", + "elementType": "String", + "name": "deliverable", + "value": "\\"Widgets\\"", + }, + Object { + "$class": "org.accordproject.commonmark.Softbreak", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "must meet for the ", + }, + Object { + "$class": "org.accordproject.ciceromark.Variable", + "elementType": "org.accordproject.organization.Organization", + "identifiedBy": "identifier", + "name": "shipper", + "value": "\\"Party A\\"", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": " to comply with its requirements and", + }, + Object { + "$class": "org.accordproject.commonmark.Softbreak", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "obligations under this agreement, detailed in ", + }, + Object { + "$class": "org.accordproject.ciceromark.Variable", + "elementType": "String", + "name": "attachment", + "value": "\\"Attachment X\\"", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": ", attached", + }, + Object { + "$class": "org.accordproject.commonmark.Softbreak", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "to this agreement.", + }, + ], + }, + ], + }, + Object { + "$class": "org.accordproject.commonmark.Paragraph", + "nodes": Array [ + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "More text", + }, + ], + }, + ], + "xmlns": "http://commonmark.org/xml/1.0", +} +`; + +exports[`ciceromark <-> html converts fixedinterest.json to and from CiceroMark 1`] = ` +" + +
+

Fixed rate loan

+

This is a fixed interest loan to the amount of £100,000.00 +at the yearly interest rate of 2.5% +with a loan term of 15, +and monthly payments of eval( monthlyPaymentFormula(loanAmount,rate,loanDuration) as \\"K0,0.00\\" )

+
+ +" +`; + +exports[`ciceromark <-> html converts fixedinterest.json to and from CiceroMark 2`] = ` +" + +
+

Fixed rate loan

+

This is a fixed interest loan to the amount of £100,000.00 +at the yearly interest rate of 2.5% +with a loan term of 15, +and monthly payments of eval( monthlyPaymentFormula(loanAmount,rate,loanDuration) as \\"K0,0.00\\" )

+
+ +" +`; + +exports[`ciceromark <-> html converts fixedinterest.json to and from CiceroMark 3`] = ` +Object { + "$class": "org.accordproject.commonmark.Document", + "nodes": Array [ + Object { + "$class": "org.accordproject.commonmark.Heading", + "level": "2", + "nodes": Array [ + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "Fixed rate loan", + }, + ], + }, + Object { + "$class": "org.accordproject.commonmark.Paragraph", + "nodes": Array [ + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "This is a ", + }, + Object { + "$class": "org.accordproject.commonmark.Emph", + "nodes": Array [ + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "fixed interest", + }, + ], + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": " loan to the amount of ", + }, + Object { + "$class": "org.accordproject.ciceromark.FormattedVariable", + "elementType": "org.accordproject.money.MonetaryAmount", + "format": "K0,0.00", + "name": "loanAmount", + "value": "£100,000.00", + }, + Object { + "$class": "org.accordproject.commonmark.Softbreak", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "at the yearly interest rate of ", + }, + Object { + "$class": "org.accordproject.ciceromark.Variable", + "elementType": "Double", + "name": "rate", + "value": "2.5", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "%", + }, + Object { + "$class": "org.accordproject.commonmark.Softbreak", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "with a loan term of ", + }, + Object { + "$class": "org.accordproject.ciceromark.Variable", + "elementType": "Integer", + "name": "loanDuration", + "value": "15", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": ",", + }, + Object { + "$class": "org.accordproject.commonmark.Softbreak", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "and monthly payments of ", + }, + Object { + "$class": "org.accordproject.ciceromark.Formula", + "code": " monthlyPaymentFormula(loanAmount,rate,loanDuration) as \\"K0,0.00\\" ", + "dependencies": Array [], + "name": "formula", + "value": " eval( monthlyPaymentFormula(loanAmount,rate,loanDuration) as \\"K0,0.00\\" ) ", + }, + ], + }, + ], + "xmlns": "http://commonmark.org/xml/1.0", +} +`; + +exports[`ciceromark <-> html converts latedelivery.json to and from CiceroMark 1`] = ` +" + +
+

Late Delivery and Penalty.

+

In case of delayed delivery except for Force Majeure cases,\\"Dan\\" (the Seller) shall pay to \\"Steve\\" (the Buyer) for every 2 days +of delay penalty amounting to 10.5% of the total value of the Equipment +whose delivery has been delayed. Any fractional part of a days is to be +considered a full days. The total amount of penalty shall not however, +exceed 55.0% of the total value of the Equipment involved in late delivery. +If the delay is more than 15 days, the Buyer is entitled to terminate this Contract.

+
+ +" +`; + +exports[`ciceromark <-> html converts latedelivery.json to and from CiceroMark 2`] = ` +" + +
+

Late Delivery and Penalty.

+

In case of delayed delivery except for Force Majeure cases,\\"Dan\\" (the Seller) shall pay to \\"Steve\\" (the Buyer) for every 2 days +of delay penalty amounting to 10.5% of the total value of the Equipment +whose delivery has been delayed. Any fractional part of a days is to be +considered a full days. The total amount of penalty shall not however, +exceed 55.0% of the total value of the Equipment involved in late delivery. +If the delay is more than 15 days, the Buyer is entitled to terminate this Contract.

+
+ +" +`; + +exports[`ciceromark <-> html converts latedelivery.json to and from CiceroMark 3`] = ` +Object { + "$class": "org.accordproject.commonmark.Document", + "nodes": Array [ + Object { + "$class": "org.accordproject.commonmark.Heading", + "level": "2", + "nodes": Array [ + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "Late Delivery and Penalty.", + }, + ], + }, + Object { + "$class": "org.accordproject.commonmark.Paragraph", + "nodes": Array [ + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "In case of delayed delivery", + }, + Object { + "$class": "org.accordproject.ciceromark.Conditional", + "isTrue": true, + "name": "forceMajeure", + "nodes": Array [ + Object { + "$class": "org.accordproject.commonmark.Text", + "text": " except for Force Majeure cases,", + }, + ], + "whenFalse": Array [], + "whenTrue": Array [ + Object { + "$class": "org.accordproject.commonmark.Text", + "text": " except for Force Majeure cases,", + }, + ], + }, + Object { + "$class": "org.accordproject.ciceromark.Variable", + "elementType": "String", + "name": "partyId", + "value": "\\"Dan\\"", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": " (the Seller) shall pay to ", + }, + Object { + "$class": "org.accordproject.ciceromark.Variable", + "elementType": "String", + "name": "partyId", + "value": "\\"Steve\\"", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": " (the Buyer) for every ", + }, + Object { + "$class": "org.accordproject.ciceromark.Variable", + "elementType": "Long", + "name": "amount", + "value": "2", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": " ", + }, + Object { + "$class": "org.accordproject.ciceromark.EnumVariable", + "enumValues": Array [ + "seconds", + "minutes", + "hours", + "days", + "weeks", + ], + "name": "unit", + "value": "days", + }, + Object { + "$class": "org.accordproject.commonmark.Softbreak", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "of delay penalty amounting to ", + }, + Object { + "$class": "org.accordproject.ciceromark.Variable", + "elementType": "Double", + "name": "penaltyPercentage", + "value": "10.5", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "% of the total value of the Equipment", + }, + Object { + "$class": "org.accordproject.commonmark.Softbreak", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "whose delivery has been delayed. Any fractional part of a ", + }, + Object { + "$class": "org.accordproject.ciceromark.EnumVariable", + "enumValues": Array [ + "seconds", + "minutes", + "hours", + "days", + "weeks", + ], + "name": "fractionalPart", + "value": "days", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": " is to be", + }, + Object { + "$class": "org.accordproject.commonmark.Softbreak", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "considered a full ", + }, + Object { + "$class": "org.accordproject.ciceromark.EnumVariable", + "enumValues": Array [ + "seconds", + "minutes", + "hours", + "days", + "weeks", + ], + "name": "fractionalPart", + "value": "days", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": ". The total amount of penalty shall not however,", + }, + Object { + "$class": "org.accordproject.commonmark.Softbreak", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "exceed ", + }, + Object { + "$class": "org.accordproject.ciceromark.Variable", + "elementType": "Double", + "name": "capPercentage", + "value": "55.0", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "% of the total value of the Equipment involved in late delivery.", + }, + Object { + "$class": "org.accordproject.commonmark.Softbreak", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": "If the delay is more than ", + }, + Object { + "$class": "org.accordproject.ciceromark.Variable", + "elementType": "Long", + "name": "amount", + "value": "15", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": " ", + }, + Object { + "$class": "org.accordproject.ciceromark.EnumVariable", + "enumValues": Array [ + "seconds", + "minutes", + "hours", + "days", + "weeks", + ], + "name": "unit", + "value": "days", + }, + Object { + "$class": "org.accordproject.commonmark.Text", + "text": ", the Buyer is entitled to terminate this Contract.", + }, + ], + }, + ], + "xmlns": "http://commonmark.org/xml/1.0", +} +`; + +exports[`markdown <-> html converts blockquote.md to html 1`] = ` Object { "$class": "org.accordproject.commonmark.Document", "nodes": Array [ @@ -32,7 +751,7 @@ Object { } `; -exports[`html converts blockquote.md to html 2`] = ` +exports[`markdown <-> html converts blockquote.md to html 2`] = ` "
@@ -44,7 +763,7 @@ exports[`html converts blockquote.md to html 2`] = ` " `; -exports[`html converts codeblock.md to html 1`] = ` +exports[`markdown <-> html converts codeblock.md to html 1`] = ` Object { "$class": "org.accordproject.commonmark.Document", "nodes": Array [ @@ -60,7 +779,7 @@ block. } `; -exports[`html converts codeblock.md to html 2`] = ` +exports[`markdown <-> html converts codeblock.md to html 2`] = ` "
@@ -73,7 +792,7 @@ block. " `; -exports[`html converts codeblock-info.md to html 1`] = ` +exports[`markdown <-> html converts codeblock-info.md to html 1`] = ` Object { "$class": "org.accordproject.commonmark.Document", "nodes": Array [ @@ -107,7 +826,7 @@ Object { } `; -exports[`html converts codeblock-info.md to html 2`] = ` +exports[`markdown <-> html converts codeblock-info.md to html 2`] = ` "
@@ -123,7 +842,7 @@ exports[`html converts codeblock-info.md to html 2`] = ` " `; -exports[`html converts emph.md to html 1`] = ` +exports[`markdown <-> html converts emph.md to html 1`] = ` Object { "$class": "org.accordproject.commonmark.Document", "nodes": Array [ @@ -154,7 +873,7 @@ Object { } `; -exports[`html converts emph.md to html 2`] = ` +exports[`markdown <-> html converts emph.md to html 2`] = ` "
@@ -164,7 +883,7 @@ exports[`html converts emph.md to html 2`] = ` " `; -exports[`html converts emph-strong.md to html 1`] = ` +exports[`markdown <-> html converts emph-strong.md to html 1`] = ` Object { "$class": "org.accordproject.commonmark.Document", "nodes": Array [ @@ -200,7 +919,7 @@ Object { } `; -exports[`html converts emph-strong.md to html 2`] = ` +exports[`markdown <-> html converts emph-strong.md to html 2`] = ` "
@@ -210,7 +929,7 @@ exports[`html converts emph-strong.md to html 2`] = ` " `; -exports[`html converts h1.md to html 1`] = ` +exports[`markdown <-> html converts h1.md to html 1`] = ` Object { "$class": "org.accordproject.commonmark.Document", "nodes": Array [ @@ -229,7 +948,7 @@ Object { } `; -exports[`html converts h1.md to html 2`] = ` +exports[`markdown <-> html converts h1.md to html 2`] = ` "
@@ -239,7 +958,7 @@ exports[`html converts h1.md to html 2`] = ` " `; -exports[`html converts h2.md to html 1`] = ` +exports[`markdown <-> html converts h2.md to html 1`] = ` Object { "$class": "org.accordproject.commonmark.Document", "nodes": Array [ @@ -258,7 +977,7 @@ Object { } `; -exports[`html converts h2.md to html 2`] = ` +exports[`markdown <-> html converts h2.md to html 2`] = ` "
@@ -268,7 +987,7 @@ exports[`html converts h2.md to html 2`] = ` " `; -exports[`html converts h3.md to html 1`] = ` +exports[`markdown <-> html converts h3.md to html 1`] = ` Object { "$class": "org.accordproject.commonmark.Document", "nodes": Array [ @@ -287,7 +1006,7 @@ Object { } `; -exports[`html converts h3.md to html 2`] = ` +exports[`markdown <-> html converts h3.md to html 2`] = ` "
@@ -297,7 +1016,7 @@ exports[`html converts h3.md to html 2`] = ` " `; -exports[`html converts h4.md to html 1`] = ` +exports[`markdown <-> html converts h4.md to html 1`] = ` Object { "$class": "org.accordproject.commonmark.Document", "nodes": Array [ @@ -316,7 +1035,7 @@ Object { } `; -exports[`html converts h4.md to html 2`] = ` +exports[`markdown <-> html converts h4.md to html 2`] = ` "
@@ -326,7 +1045,7 @@ exports[`html converts h4.md to html 2`] = ` " `; -exports[`html converts h5.md to html 1`] = ` +exports[`markdown <-> html converts h5.md to html 1`] = ` Object { "$class": "org.accordproject.commonmark.Document", "nodes": Array [ @@ -345,7 +1064,7 @@ Object { } `; -exports[`html converts h5.md to html 2`] = ` +exports[`markdown <-> html converts h5.md to html 2`] = ` "
@@ -355,7 +1074,7 @@ exports[`html converts h5.md to html 2`] = ` " `; -exports[`html converts h6.md to html 1`] = ` +exports[`markdown <-> html converts h6.md to html 1`] = ` Object { "$class": "org.accordproject.commonmark.Document", "nodes": Array [ @@ -374,7 +1093,7 @@ Object { } `; -exports[`html converts h6.md to html 2`] = ` +exports[`markdown <-> html converts h6.md to html 2`] = ` "
@@ -384,7 +1103,7 @@ exports[`html converts h6.md to html 2`] = ` " `; -exports[`html converts html-inline.md to html 1`] = ` +exports[`markdown <-> html converts html-inline.md to html 1`] = ` Object { "$class": "org.accordproject.commonmark.Document", "nodes": Array [ @@ -424,7 +1143,7 @@ Object { } `; -exports[`html converts html-inline.md to html 2`] = ` +exports[`markdown <-> html converts html-inline.md to html 2`] = ` "
@@ -434,7 +1153,7 @@ exports[`html converts html-inline.md to html 2`] = ` " `; -exports[`html converts html-mixed.md to html 1`] = ` +exports[`markdown <-> html converts html-mixed.md to html 1`] = ` Object { "$class": "org.accordproject.commonmark.Document", "nodes": Array [ @@ -485,7 +1204,7 @@ Object { } `; -exports[`html converts html-mixed.md to html 2`] = ` +exports[`markdown <-> html converts html-mixed.md to html 2`] = ` "
@@ -497,7 +1216,7 @@ exports[`html converts html-mixed.md to html 2`] = ` " `; -exports[`html converts inline-code.md to html 1`] = ` +exports[`markdown <-> html converts inline-code.md to html 1`] = ` Object { "$class": "org.accordproject.commonmark.Document", "nodes": Array [ @@ -523,7 +1242,7 @@ Object { } `; -exports[`html converts inline-code.md to html 2`] = ` +exports[`markdown <-> html converts inline-code.md to html 2`] = ` "
@@ -533,7 +1252,7 @@ exports[`html converts inline-code.md to html 2`] = ` " `; -exports[`html converts link.md to html 1`] = ` +exports[`markdown <-> html converts link.md to html 1`] = ` Object { "$class": "org.accordproject.commonmark.Document", "nodes": Array [ @@ -566,7 +1285,7 @@ Object { } `; -exports[`html converts link.md to html 2`] = ` +exports[`markdown <-> html converts link.md to html 2`] = ` "
@@ -576,7 +1295,7 @@ exports[`html converts link.md to html 2`] = ` " `; -exports[`html converts multiline-html-block.md to html 1`] = ` +exports[`markdown <-> html converts multiline-html-block.md to html 1`] = ` Object { "$class": "org.accordproject.commonmark.Document", "nodes": Array [ @@ -625,7 +1344,7 @@ contents } `; -exports[`html converts multiline-html-block.md to html 2`] = ` +exports[`markdown <-> html converts multiline-html-block.md to html 2`] = ` "
@@ -639,7 +1358,7 @@ contents " `; -exports[`html converts ol.md to html 1`] = ` +exports[`markdown <-> html converts ol.md to html 1`] = ` Object { "$class": "org.accordproject.commonmark.Document", "nodes": Array [ @@ -717,7 +1436,7 @@ Object { } `; -exports[`html converts ol.md to html 2`] = ` +exports[`markdown <-> html converts ol.md to html 2`] = ` "
@@ -735,7 +1454,7 @@ exports[`html converts ol.md to html 2`] = ` " `; -exports[`html converts ol-tight.md to html 1`] = ` +exports[`markdown <-> html converts ol-tight.md to html 1`] = ` Object { "$class": "org.accordproject.commonmark.Document", "nodes": Array [ @@ -813,7 +1532,7 @@ Object { } `; -exports[`html converts ol-tight.md to html 2`] = ` +exports[`markdown <-> html converts ol-tight.md to html 2`] = ` "
@@ -831,7 +1550,7 @@ exports[`html converts ol-tight.md to html 2`] = ` " `; -exports[`html converts paragraphs.md to html 1`] = ` +exports[`markdown <-> html converts paragraphs.md to html 1`] = ` Object { "$class": "org.accordproject.commonmark.Document", "nodes": Array [ @@ -858,7 +1577,7 @@ Object { } `; -exports[`html converts paragraphs.md to html 2`] = ` +exports[`markdown <-> html converts paragraphs.md to html 2`] = ` "
@@ -869,7 +1588,7 @@ exports[`html converts paragraphs.md to html 2`] = ` " `; -exports[`html converts strong.md to html 1`] = ` +exports[`markdown <-> html converts strong.md to html 1`] = ` Object { "$class": "org.accordproject.commonmark.Document", "nodes": Array [ @@ -900,7 +1619,7 @@ Object { } `; -exports[`html converts strong.md to html 2`] = ` +exports[`markdown <-> html converts strong.md to html 2`] = ` "
@@ -910,7 +1629,7 @@ exports[`html converts strong.md to html 2`] = ` " `; -exports[`html converts text.md to html 1`] = ` +exports[`markdown <-> html converts text.md to html 1`] = ` Object { "$class": "org.accordproject.commonmark.Document", "nodes": Array [ @@ -928,7 +1647,7 @@ Object { } `; -exports[`html converts text.md to html 2`] = ` +exports[`markdown <-> html converts text.md to html 2`] = ` "
@@ -938,7 +1657,7 @@ exports[`html converts text.md to html 2`] = ` " `; -exports[`html converts thematicbreak.md to html 1`] = ` +exports[`markdown <-> html converts thematicbreak.md to html 1`] = ` Object { "$class": "org.accordproject.commonmark.Document", "nodes": Array [ @@ -968,7 +1687,7 @@ Object { } `; -exports[`html converts thematicbreak.md to html 2`] = ` +exports[`markdown <-> html converts thematicbreak.md to html 2`] = ` "
@@ -981,7 +1700,7 @@ exports[`html converts thematicbreak.md to html 2`] = ` " `; -exports[`html converts ul.md to html 1`] = ` +exports[`markdown <-> html converts ul.md to html 1`] = ` Object { "$class": "org.accordproject.commonmark.Document", "nodes": Array [ @@ -1057,7 +1776,7 @@ Object { } `; -exports[`html converts ul.md to html 2`] = ` +exports[`markdown <-> html converts ul.md to html 2`] = ` "
@@ -1075,7 +1794,7 @@ exports[`html converts ul.md to html 2`] = ` " `; -exports[`html converts ul-tight.md to html 1`] = ` +exports[`markdown <-> html converts ul-tight.md to html 1`] = ` Object { "$class": "org.accordproject.commonmark.Document", "nodes": Array [ @@ -1151,7 +1870,7 @@ Object { } `; -exports[`html converts ul-tight.md to html 2`] = ` +exports[`markdown <-> html converts ul-tight.md to html 2`] = ` "
@@ -1169,7 +1888,7 @@ exports[`html converts ul-tight.md to html 2`] = ` " `; -exports[`html converts unwrapped
  • to html 1`] = ` +exports[`markdown <-> html converts unwrapped
  • to html 1`] = ` Object { "$class": "org.accordproject.commonmark.Document", "nodes": Array [ @@ -1205,7 +1924,7 @@ Object { } `; -exports[`html converts unwrapped
  • to html 2`] = ` +exports[`markdown <-> html converts unwrapped
  • to html 2`] = ` "Hello - list item diff --git a/packages/markdown-html/src/helpers.js b/packages/markdown-html/src/helpers.js index 58e5946b..767735a9 100644 --- a/packages/markdown-html/src/helpers.js +++ b/packages/markdown-html/src/helpers.js @@ -32,15 +32,17 @@ function isAllWhitespace( node ) * Determine if a node should be ignored by the iterator functions. * * @param {object} node An object implementing the DOM1 |Node| interface. + * @param {boolean} ignoreSpace override * @return {boolean} true if the node is: * 1) A |Text| node that is all whitespace * 2) A |Comment| node * and otherwise false. */ -function isIgnorable( node ) +function isIgnorable(node, ignoreSpace) { - return ( node.nodeType === 8) || // A comment node - ( (node.nodeType === 3) && isAllWhitespace(node) ); // a text node, all ws + return (ignoreSpace && // Is ignoring space allowed in this context + ((node.nodeType === 8) || // A comment node + (node.nodeType === 3) && isAllWhitespace(node))); // a text node, all ws } module.exports = { diff --git a/packages/markdown-html/src/rules.js b/packages/markdown-html/src/rules.js index e7829106..4df76370 100644 --- a/packages/markdown-html/src/rules.js +++ b/packages/markdown-html/src/rules.js @@ -23,14 +23,14 @@ const { isIgnorable } = require('./helpers'); * @type {Object} */ const TEXT_RULE = { - deserialize(el) { + deserialize(el, next, ignoreSpace) { if (el.tagName && el.tagName.toLowerCase() === 'br') { // add Linebreak node in ciceromark return; } // text nodes will be of type 3 - if (el.nodeType === 3 && !isIgnorable(el)) { + if (el.nodeType === 3 && !isIgnorable(el, ignoreSpace)) { const textArray = el.nodeValue.split('\n'); const textNodes = textArray.map(text => { if (text) { @@ -53,13 +53,13 @@ const TEXT_RULE = { * @type {Object} */ const LIST_RULE = { - deserialize(el, next) { + deserialize(el, next, ignoreSpace) { if (el.tagName && el.tagName.toLowerCase() === 'ul') { return { '$class': `${NS_PREFIX_CommonMarkModel}List`, type: 'bullet', tight: el.getAttribute('tight') ? el.getAttribute('tight') : true, - nodes: next(el.childNodes) + nodes: next(el.childNodes, ignoreSpace) }; } if (el.tagName && el.tagName.toLowerCase() === 'ol') { @@ -69,7 +69,7 @@ const LIST_RULE = { delimiter: el.getAttribute('delimiter'), start: el.getAttribute('start'), tight: el.getAttribute('tight') ? el.getAttribute('tight') : true, - nodes: next(el.childNodes) + nodes: next(el.childNodes, ignoreSpace) }; } @@ -87,11 +87,11 @@ const LIST_RULE = { * @type {Object} */ const PARAGRAPH_RULE = { - deserialize(el, next) { + deserialize(el, next, ignoreSpace) { if (el.tagName && el.tagName.toLowerCase() === 'p') { return { '$class': `${NS_PREFIX_CommonMarkModel}Paragraph`, - nodes: next(el.childNodes) + nodes: next(el.childNodes, false) }; } } @@ -102,11 +102,11 @@ const PARAGRAPH_RULE = { * @type {Object} */ const STRONG_RULE = { - deserialize(el, next) { + deserialize(el, next, ignoreSpace) { if (el.tagName && el.tagName.toLowerCase() === 'strong') { return { '$class': `${NS_PREFIX_CommonMarkModel}Strong`, - nodes: next(el.childNodes) + nodes: next(el.childNodes, ignoreSpace) }; } } @@ -117,11 +117,11 @@ const STRONG_RULE = { * @type {Object} */ const EMPH_RULE = { - deserialize(el, next) { + deserialize(el, next, ignoreSpace) { if (el.tagName && el.tagName.toLowerCase() === 'em') { return { '$class': `${NS_PREFIX_CommonMarkModel}Emph`, - nodes: next(el.childNodes) + nodes: next(el.childNodes, ignoreSpace) }; } } @@ -132,11 +132,11 @@ const EMPH_RULE = { * @type {Object} */ const LINK_RULE = { - deserialize(el, next) { + deserialize(el, next, ignoreSpace) { if (el.tagName && el.tagName.toLowerCase() === 'a') { return { '$class': `${NS_PREFIX_CommonMarkModel}Link`, - nodes: next(el.childNodes), + nodes: next(el.childNodes, ignoreSpace), destination: el.getAttribute('href') ? el.getAttribute('href') : 'none', title: el.getAttribute('title') ? el.getAttribute('title') : '', }; @@ -149,11 +149,11 @@ const LINK_RULE = { * @type {Object} */ const IMAGE_RULE = { - deserialize(el, next) { + deserialize(el, next, ignoreSpace) { if (el.tagName && el.tagName.toLowerCase() === 'img') { return { '$class': `${NS_PREFIX_CommonMarkModel}Image`, - nodes: next(el.childNodes), + nodes: next(el.childNodes, ignoreSpace), destination: el.getAttribute('src') ? el.getAttribute('src') : 'none', title: el.getAttribute('title') ? el.getAttribute('title') : '', }; @@ -166,7 +166,7 @@ const IMAGE_RULE = { * @type {Object} */ const HEADING_RULE = { - deserialize(el, next) { + deserialize(el, next, ignoreSpace) { if (el.tagName) { let level; switch (el.tagName.toLowerCase()) { @@ -194,7 +194,7 @@ const HEADING_RULE = { if (level) { return { '$class': `${NS_PREFIX_CommonMarkModel}Heading`, - nodes: next(el.childNodes), + nodes: next(el.childNodes, false), level, }; } @@ -207,7 +207,7 @@ const HEADING_RULE = { * @type {Object} */ const THEMATIC_BREAK_RULE = { - deserialize(el, next) { + deserialize(el, next, ignoreSpace) { if (el.tagName && el.tagName.toLowerCase() === 'hr') { return { '$class': `${NS_PREFIX_CommonMarkModel}ThematicBreak`, @@ -223,7 +223,7 @@ const THEMATIC_BREAK_RULE = { // Look at common mark dingus and see how they are mapping html blocks // TODO: figure out how to handle custom html blocks (could be anything?) // const HTML_BLOCK_RULE = { -// deserialize(el, next) { +// deserialize(el, next, ignoreSpace) { // if (el.tagName ) { // return { // '$class': `${NS_PREFIX_CommonMarkModel}HtmlBlock`, @@ -237,7 +237,7 @@ const THEMATIC_BREAK_RULE = { * @type {Object} */ const CODE_BLOCK_RULE = { - deserialize(el, next) { + deserialize(el, next, ignoreSpace) { if (el.tagName && el.tagName.toLowerCase() === 'pre' && el.getAttribute('class') === 'code_block') { const children = el.childNodes; if (children.length === 1 && children[0].tagName.toLowerCase() === 'code') @@ -268,7 +268,7 @@ const CODE_BLOCK_RULE = { * @type {Object} */ const INLINE_CODE_RULE = { - deserialize(el, next) { + deserialize(el, next, ignoreSpace) { if (el.tagName && el.tagName.toLowerCase() === 'code') { { return { @@ -285,11 +285,11 @@ const INLINE_CODE_RULE = { * @type {Object} */ const BLOCK_QUOTE_RULE = { - deserialize(el, next) { + deserialize(el, next, ignoreSpace) { if (el.tagName && el.tagName.toLowerCase() === 'blockquote') { return { '$class': `${NS_PREFIX_CommonMarkModel}BlockQuote`, - nodes: next(el.childNodes) + nodes: next(el.childNodes, ignoreSpace) }; } } @@ -300,15 +300,21 @@ const BLOCK_QUOTE_RULE = { * @type {Object} */ const CLAUSE_RULE = { - deserialize(el, next) { + deserialize(el, next, ignoreSpace) { const tag = el.tagName; if (tag && tag.toLowerCase() === 'div' && el.getAttribute('class') === 'clause') { - return { + const clause = { '$class': `${NS_PREFIX_CiceroMarkModel}Clause`, name: el.getAttribute('name'), - src: el.getAttribute('src'), - nodes: next(el.childNodes) + nodes: next(el.childNodes, false) }; + if (el.getAttribute('elementType')) { + clause.elementType = el.getAttribute('elementType'); + } + if (el.getAttribute('src')) { + clause.src = el.getAttribute('src'); + } + return clause; } } }; @@ -318,30 +324,38 @@ const CLAUSE_RULE = { * @type {Object} */ const VARIABLE_RULE = { - deserialize(el, next) { + deserialize(el, next, ignoreSpace) { const { tagName } = el; if (tagName && tagName.toLowerCase() === 'span' && el.getAttribute('class') === 'variable') { + let variable; if (el.getAttribute('format')) { - return { + variable = { '$class': `${NS_PREFIX_CiceroMarkModel}FormattedVariable`, name: el.getAttribute('name'), value: el.textContent, format: el.getAttribute('format') }; } else if (el.getAttribute('enumValues')) { - return { + variable = { '$class': `${NS_PREFIX_CiceroMarkModel}EnumVariable`, name: el.getAttribute('name'), value: el.textContent, enumValues: JSON.parse(decodeURIComponent(el.getAttribute('enumValues'))), }; } else { - return { + variable = { '$class': `${NS_PREFIX_CiceroMarkModel}Variable`, name: el.getAttribute('name'), value: el.textContent, }; } + if (el.getAttribute('elementType')) { + variable.elementType = el.getAttribute('elementType'); + } + if (el.getAttribute('identifiedBy')) { + variable.identifiedBy = el.getAttribute('identifiedBy'); + } + return variable; } } }; @@ -351,7 +365,7 @@ const VARIABLE_RULE = { * @type {Object} */ const CONDITIONAL_RULE = { - deserialize(el, next) { + deserialize(el, next, ignoreSpace) { const { tagName } = el; if (tagName && tagName.toLowerCase() === 'span' && el.getAttribute('class') === 'conditional') { const text = el.textContent; @@ -383,28 +397,21 @@ const CONDITIONAL_RULE = { * @type {Object} */ const FORMULA_RULE = { - deserialize(el, next) { + deserialize(el, next, ignoreSpace) { const { tagName } = el; if (tagName && tagName.toLowerCase() === 'span' && el.getAttribute('class') === 'formula') { - return { + const formula = { '$class': `${NS_PREFIX_CiceroMarkModel}Formula`, name: el.getAttribute('name'), value: el.textContent, }; - } - } -}; - -/** - * A rule to deserialize softbreak nodes. - * @type {Object} - */ -const SOFTBREAK_RULE = { - deserialize(el, next) { - if (el.tagName && el.tagName.toLowerCase() === 'wbr') { - return { - '$class': `${NS_PREFIX_CommonMarkModel}Softbreak`, - }; + if (el.getAttribute('code')) { + formula.code = decodeURIComponent(el.getAttribute('code')); + } + if (el.getAttribute('dependencies')) { + formula.dependencies = JSON.parse(decodeURIComponent(el.getAttribute('dependencies'))); + } + return formula; } } }; @@ -414,7 +421,7 @@ const SOFTBREAK_RULE = { * @type {Object} */ const HTML_INLINE_RULE = { - deserialize(el, next) { + deserialize(el, next, ignoreSpace) { const { tagName } = el; if (tagName && tagName.toLowerCase() === 'span' && el.getAttribute('class') === 'html_inline') { { @@ -435,7 +442,7 @@ const HTML_INLINE_RULE = { * @type {Object} */ const HTML_BLOCK_RULE = { - deserialize(el, next) { + deserialize(el, next, ignoreSpace) { if (el.tagName && el.tagName.toLowerCase() === 'pre' && el.getAttribute('class') === 'html_block') { const children = el.childNodes; if (children.length === 1 && children[0].tagName.toLowerCase() === 'code') @@ -466,7 +473,6 @@ const rules = [ CLAUSE_RULE, VARIABLE_RULE, CONDITIONAL_RULE, - SOFTBREAK_RULE, FORMULA_RULE, TEXT_RULE, HTML_INLINE_RULE, diff --git a/packages/markdown-html/test/data/ciceromark/acceptance.html b/packages/markdown-html/test/data/ciceromark/acceptance.html new file mode 100644 index 00000000..ae8e490b --- /dev/null +++ b/packages/markdown-html/test/data/ciceromark/acceptance.html @@ -0,0 +1,26 @@ + + +
    +

    Heading

    +

    And below is a clause.

    +
    +

    Acceptance of Delivery.

    +

    "Party A" will be deemed to have completed its delivery obligations +if in "Party B"'s opinion, the "Widgets" satisfies the +Acceptance Criteria, and "Party B" notifies "Party A" in writing +that it is accepting the "Widgets".

    +

    Inspection and Notice.

    +

    "Party B" will have 10 Business Days to inspect and +evaluate the "Widgets" on the delivery date before notifying +"Party A" that it is either accepting or rejecting the +"Widgets".

    +

    Acceptance Criteria.

    +

    The "Acceptance Criteria" are the specifications the "Widgets" +must meet for the "Party A" to comply with its requirements and +obligations under this agreement, detailed in "Attachment X", attached +to this agreement.

    +
    +

    More text

    +
    + + \ No newline at end of file diff --git a/packages/markdown-html/test/data/ciceromark/acceptance.json b/packages/markdown-html/test/data/ciceromark/acceptance.json new file mode 100644 index 00000000..9354cd86 --- /dev/null +++ b/packages/markdown-html/test/data/ciceromark/acceptance.json @@ -0,0 +1 @@ +{"$class":"org.accordproject.commonmark.Document","xmlns":"http://commonmark.org/xml/1.0","nodes":[{"$class":"org.accordproject.commonmark.Heading","level":"1","nodes":[{"$class":"org.accordproject.commonmark.Text","text":"Heading"}]},{"$class":"org.accordproject.commonmark.Paragraph","nodes":[{"$class":"org.accordproject.commonmark.Text","text":"And below is a "},{"$class":"org.accordproject.commonmark.Strong","nodes":[{"$class":"org.accordproject.commonmark.Text","text":"clause"}]},{"$class":"org.accordproject.commonmark.Text","text":"."}]},{"$class":"org.accordproject.ciceromark.Clause","name":"deliveryClause","elementType":"org.accordproject.acceptanceofdelivery.AcceptanceOfDeliveryClause","nodes":[{"$class":"org.accordproject.commonmark.Heading","level":"2","nodes":[{"$class":"org.accordproject.commonmark.Text","text":"Acceptance of Delivery."}]},{"$class":"org.accordproject.commonmark.Paragraph","nodes":[{"$class":"org.accordproject.ciceromark.Variable","value":"\"Party A\"","identifiedBy":"identifier","name":"shipper","elementType":"org.accordproject.organization.Organization"},{"$class":"org.accordproject.commonmark.Text","text":" will be deemed to have completed its delivery obligations"},{"$class":"org.accordproject.commonmark.Softbreak"},{"$class":"org.accordproject.commonmark.Text","text":"if in "},{"$class":"org.accordproject.ciceromark.Variable","value":"\"Party B\"","identifiedBy":"identifier","name":"receiver","elementType":"org.accordproject.organization.Organization"},{"$class":"org.accordproject.commonmark.Text","text":"'s opinion, the "},{"$class":"org.accordproject.ciceromark.Variable","value":"\"Widgets\"","name":"deliverable","elementType":"String"},{"$class":"org.accordproject.commonmark.Text","text":" satisfies the"},{"$class":"org.accordproject.commonmark.Softbreak"},{"$class":"org.accordproject.commonmark.Text","text":"Acceptance Criteria, and "},{"$class":"org.accordproject.ciceromark.Variable","value":"\"Party B\"","identifiedBy":"identifier","name":"receiver","elementType":"org.accordproject.organization.Organization"},{"$class":"org.accordproject.commonmark.Text","text":" notifies "},{"$class":"org.accordproject.ciceromark.Variable","value":"\"Party A\"","identifiedBy":"identifier","name":"shipper","elementType":"org.accordproject.organization.Organization"},{"$class":"org.accordproject.commonmark.Text","text":" in writing"},{"$class":"org.accordproject.commonmark.Softbreak"},{"$class":"org.accordproject.commonmark.Text","text":"that it is accepting the "},{"$class":"org.accordproject.ciceromark.Variable","value":"\"Widgets\"","name":"deliverable","elementType":"String"},{"$class":"org.accordproject.commonmark.Text","text":"."}]},{"$class":"org.accordproject.commonmark.Heading","level":"2","nodes":[{"$class":"org.accordproject.commonmark.Text","text":"Inspection and Notice."}]},{"$class":"org.accordproject.commonmark.Paragraph","nodes":[{"$class":"org.accordproject.ciceromark.Variable","value":"\"Party B\"","identifiedBy":"identifier","name":"receiver","elementType":"org.accordproject.organization.Organization"},{"$class":"org.accordproject.commonmark.Text","text":" will have "},{"$class":"org.accordproject.ciceromark.Variable","value":"10","name":"businessDays","elementType":"Long"},{"$class":"org.accordproject.commonmark.Text","text":" Business Days to inspect and"},{"$class":"org.accordproject.commonmark.Softbreak"},{"$class":"org.accordproject.commonmark.Text","text":"evaluate the "},{"$class":"org.accordproject.ciceromark.Variable","value":"\"Widgets\"","name":"deliverable","elementType":"String"},{"$class":"org.accordproject.commonmark.Text","text":" on the delivery date before notifying"},{"$class":"org.accordproject.commonmark.Softbreak"},{"$class":"org.accordproject.ciceromark.Variable","value":"\"Party A\"","identifiedBy":"identifier","name":"shipper","elementType":"org.accordproject.organization.Organization"},{"$class":"org.accordproject.commonmark.Text","text":" that it is either accepting or rejecting the"},{"$class":"org.accordproject.commonmark.Softbreak"},{"$class":"org.accordproject.ciceromark.Variable","value":"\"Widgets\"","name":"deliverable","elementType":"String"},{"$class":"org.accordproject.commonmark.Text","text":"."}]},{"$class":"org.accordproject.commonmark.Heading","level":"2","nodes":[{"$class":"org.accordproject.commonmark.Text","text":"Acceptance Criteria."}]},{"$class":"org.accordproject.commonmark.Paragraph","nodes":[{"$class":"org.accordproject.commonmark.Text","text":"The \"Acceptance Criteria\" are the specifications the "},{"$class":"org.accordproject.ciceromark.Variable","value":"\"Widgets\"","name":"deliverable","elementType":"String"},{"$class":"org.accordproject.commonmark.Softbreak"},{"$class":"org.accordproject.commonmark.Text","text":"must meet for the "},{"$class":"org.accordproject.ciceromark.Variable","value":"\"Party A\"","identifiedBy":"identifier","name":"shipper","elementType":"org.accordproject.organization.Organization"},{"$class":"org.accordproject.commonmark.Text","text":" to comply with its requirements and"},{"$class":"org.accordproject.commonmark.Softbreak"},{"$class":"org.accordproject.commonmark.Text","text":"obligations under this agreement, detailed in "},{"$class":"org.accordproject.ciceromark.Variable","value":"\"Attachment X\"","name":"attachment","elementType":"String"},{"$class":"org.accordproject.commonmark.Text","text":", attached"},{"$class":"org.accordproject.commonmark.Softbreak"},{"$class":"org.accordproject.commonmark.Text","text":"to this agreement."}]}]},{"$class":"org.accordproject.commonmark.Paragraph","nodes":[{"$class":"org.accordproject.commonmark.Text","text":"More text"}]}]} \ No newline at end of file diff --git a/packages/markdown-html/test/data/ciceromark/fixedinterest.html b/packages/markdown-html/test/data/ciceromark/fixedinterest.html new file mode 100644 index 00000000..fe1bcab5 --- /dev/null +++ b/packages/markdown-html/test/data/ciceromark/fixedinterest.html @@ -0,0 +1,11 @@ + + +
    +

    Fixed rate loan

    +

    This is a fixed interest loan to the amount of £100,000.00 +at the yearly interest rate of 2.5% +with a loan term of 15, +and monthly payments of eval( monthlyPaymentFormula(loanAmount,rate,loanDuration) as "K0,0.00" )

    +
    + + \ No newline at end of file diff --git a/packages/markdown-html/test/data/ciceromark/fixedinterest.json b/packages/markdown-html/test/data/ciceromark/fixedinterest.json new file mode 100644 index 00000000..dd44e4e6 --- /dev/null +++ b/packages/markdown-html/test/data/ciceromark/fixedinterest.json @@ -0,0 +1 @@ +{"$class":"org.accordproject.commonmark.Document","xmlns":"http://commonmark.org/xml/1.0","nodes":[{"$class":"org.accordproject.commonmark.Heading","level":"2","nodes":[{"$class":"org.accordproject.commonmark.Text","text":"Fixed rate loan"}]},{"$class":"org.accordproject.commonmark.Paragraph","nodes":[{"$class":"org.accordproject.commonmark.Text","text":"This is a "},{"$class":"org.accordproject.commonmark.Emph","nodes":[{"$class":"org.accordproject.commonmark.Text","text":"fixed interest"}]},{"$class":"org.accordproject.commonmark.Text","text":" loan to the amount of "},{"$class":"org.accordproject.ciceromark.FormattedVariable","format":"K0,0.00","value":"£100,000.00","name":"loanAmount","elementType":"org.accordproject.money.MonetaryAmount"},{"$class":"org.accordproject.commonmark.Softbreak"},{"$class":"org.accordproject.commonmark.Text","text":"at the yearly interest rate of "},{"$class":"org.accordproject.ciceromark.Variable","value":"2.5","name":"rate","elementType":"Double"},{"$class":"org.accordproject.commonmark.Text","text":"%"},{"$class":"org.accordproject.commonmark.Softbreak"},{"$class":"org.accordproject.commonmark.Text","text":"with a loan term of "},{"$class":"org.accordproject.ciceromark.Variable","value":"15","name":"loanDuration","elementType":"Integer"},{"$class":"org.accordproject.commonmark.Text","text":","},{"$class":"org.accordproject.commonmark.Softbreak"},{"$class":"org.accordproject.commonmark.Text","text":"and monthly payments of "},{"$class":"org.accordproject.ciceromark.Formula","value":" eval( monthlyPaymentFormula(loanAmount,rate,loanDuration) as \"K0,0.00\" ) ","dependencies":[],"code":" monthlyPaymentFormula(loanAmount,rate,loanDuration) as \"K0,0.00\" ","name":"formula"}]}]} \ No newline at end of file diff --git a/packages/markdown-html/test/data/ciceromark/latedelivery.html b/packages/markdown-html/test/data/ciceromark/latedelivery.html new file mode 100644 index 00000000..7d9f7f33 --- /dev/null +++ b/packages/markdown-html/test/data/ciceromark/latedelivery.html @@ -0,0 +1,13 @@ + + +
    +

    Late Delivery and Penalty.

    +

    In case of delayed delivery except for Force Majeure cases,"Dan" (the Seller) shall pay to "Steve" (the Buyer) for every 2 days +of delay penalty amounting to 10.5% of the total value of the Equipment +whose delivery has been delayed. Any fractional part of a days is to be +considered a full days. The total amount of penalty shall not however, +exceed 55.0% of the total value of the Equipment involved in late delivery. +If the delay is more than 15 days, the Buyer is entitled to terminate this Contract.

    +
    + + \ No newline at end of file diff --git a/packages/markdown-html/test/data/ciceromark/latedelivery.json b/packages/markdown-html/test/data/ciceromark/latedelivery.json new file mode 100644 index 00000000..81f63538 --- /dev/null +++ b/packages/markdown-html/test/data/ciceromark/latedelivery.json @@ -0,0 +1 @@ +{"$class":"org.accordproject.commonmark.Document","xmlns":"http://commonmark.org/xml/1.0","nodes":[{"$class":"org.accordproject.commonmark.Heading","level":"2","nodes":[{"$class":"org.accordproject.commonmark.Text","text":"Late Delivery and Penalty."}]},{"$class":"org.accordproject.commonmark.Paragraph","nodes":[{"$class":"org.accordproject.commonmark.Text","text":"In case of delayed delivery"},{"$class":"org.accordproject.ciceromark.Conditional","isTrue":true,"whenTrue":[{"$class":"org.accordproject.commonmark.Text","text":" except for Force Majeure cases,"}],"whenFalse":[],"name":"forceMajeure","nodes":[{"$class":"org.accordproject.commonmark.Text","text":" except for Force Majeure cases,"}]},{"$class":"org.accordproject.ciceromark.Variable","value":"\"Dan\"","name":"partyId","elementType":"String"},{"$class":"org.accordproject.commonmark.Text","text":" (the Seller) shall pay to "},{"$class":"org.accordproject.ciceromark.Variable","value":"\"Steve\"","name":"partyId","elementType":"String"},{"$class":"org.accordproject.commonmark.Text","text":" (the Buyer) for every "},{"$class":"org.accordproject.ciceromark.Variable","value":"2","name":"amount","elementType":"Long"},{"$class":"org.accordproject.commonmark.Text","text":" "},{"$class":"org.accordproject.ciceromark.EnumVariable","enumValues":["seconds","minutes","hours","days","weeks"],"value":"days","name":"unit"},{"$class":"org.accordproject.commonmark.Softbreak"},{"$class":"org.accordproject.commonmark.Text","text":"of delay penalty amounting to "},{"$class":"org.accordproject.ciceromark.Variable","value":"10.5","name":"penaltyPercentage","elementType":"Double"},{"$class":"org.accordproject.commonmark.Text","text":"% of the total value of the Equipment"},{"$class":"org.accordproject.commonmark.Softbreak"},{"$class":"org.accordproject.commonmark.Text","text":"whose delivery has been delayed. Any fractional part of a "},{"$class":"org.accordproject.ciceromark.EnumVariable","enumValues":["seconds","minutes","hours","days","weeks"],"value":"days","name":"fractionalPart"},{"$class":"org.accordproject.commonmark.Text","text":" is to be"},{"$class":"org.accordproject.commonmark.Softbreak"},{"$class":"org.accordproject.commonmark.Text","text":"considered a full "},{"$class":"org.accordproject.ciceromark.EnumVariable","enumValues":["seconds","minutes","hours","days","weeks"],"value":"days","name":"fractionalPart"},{"$class":"org.accordproject.commonmark.Text","text":". The total amount of penalty shall not however,"},{"$class":"org.accordproject.commonmark.Softbreak"},{"$class":"org.accordproject.commonmark.Text","text":"exceed "},{"$class":"org.accordproject.ciceromark.Variable","value":"55.0","name":"capPercentage","elementType":"Double"},{"$class":"org.accordproject.commonmark.Text","text":"% of the total value of the Equipment involved in late delivery."},{"$class":"org.accordproject.commonmark.Softbreak"},{"$class":"org.accordproject.commonmark.Text","text":"If the delay is more than "},{"$class":"org.accordproject.ciceromark.Variable","value":"15","name":"amount","elementType":"Long"},{"$class":"org.accordproject.commonmark.Text","text":" "},{"$class":"org.accordproject.ciceromark.EnumVariable","enumValues":["seconds","minutes","hours","days","weeks"],"value":"days","name":"unit"},{"$class":"org.accordproject.commonmark.Text","text":", the Buyer is entitled to terminate this Contract."}]}]} \ No newline at end of file diff --git a/packages/markdown-html/test/data/blockquote.md b/packages/markdown-html/test/data/markdown/blockquote.md similarity index 100% rename from packages/markdown-html/test/data/blockquote.md rename to packages/markdown-html/test/data/markdown/blockquote.md diff --git a/packages/markdown-html/test/data/codeblock-info.md b/packages/markdown-html/test/data/markdown/codeblock-info.md similarity index 100% rename from packages/markdown-html/test/data/codeblock-info.md rename to packages/markdown-html/test/data/markdown/codeblock-info.md diff --git a/packages/markdown-html/test/data/codeblock.md b/packages/markdown-html/test/data/markdown/codeblock.md similarity index 100% rename from packages/markdown-html/test/data/codeblock.md rename to packages/markdown-html/test/data/markdown/codeblock.md diff --git a/packages/markdown-html/test/data/emph-strong.md b/packages/markdown-html/test/data/markdown/emph-strong.md similarity index 100% rename from packages/markdown-html/test/data/emph-strong.md rename to packages/markdown-html/test/data/markdown/emph-strong.md diff --git a/packages/markdown-html/test/data/emph.md b/packages/markdown-html/test/data/markdown/emph.md similarity index 100% rename from packages/markdown-html/test/data/emph.md rename to packages/markdown-html/test/data/markdown/emph.md diff --git a/packages/markdown-html/test/data/h1.md b/packages/markdown-html/test/data/markdown/h1.md similarity index 100% rename from packages/markdown-html/test/data/h1.md rename to packages/markdown-html/test/data/markdown/h1.md diff --git a/packages/markdown-html/test/data/h2.md b/packages/markdown-html/test/data/markdown/h2.md similarity index 100% rename from packages/markdown-html/test/data/h2.md rename to packages/markdown-html/test/data/markdown/h2.md diff --git a/packages/markdown-html/test/data/h3.md b/packages/markdown-html/test/data/markdown/h3.md similarity index 100% rename from packages/markdown-html/test/data/h3.md rename to packages/markdown-html/test/data/markdown/h3.md diff --git a/packages/markdown-html/test/data/h4.md b/packages/markdown-html/test/data/markdown/h4.md similarity index 100% rename from packages/markdown-html/test/data/h4.md rename to packages/markdown-html/test/data/markdown/h4.md diff --git a/packages/markdown-html/test/data/h5.md b/packages/markdown-html/test/data/markdown/h5.md similarity index 100% rename from packages/markdown-html/test/data/h5.md rename to packages/markdown-html/test/data/markdown/h5.md diff --git a/packages/markdown-html/test/data/h6.md b/packages/markdown-html/test/data/markdown/h6.md similarity index 100% rename from packages/markdown-html/test/data/h6.md rename to packages/markdown-html/test/data/markdown/h6.md diff --git a/packages/markdown-html/test/data/html-inline.md b/packages/markdown-html/test/data/markdown/html-inline.md similarity index 100% rename from packages/markdown-html/test/data/html-inline.md rename to packages/markdown-html/test/data/markdown/html-inline.md diff --git a/packages/markdown-html/test/data/html-mixed.md b/packages/markdown-html/test/data/markdown/html-mixed.md similarity index 100% rename from packages/markdown-html/test/data/html-mixed.md rename to packages/markdown-html/test/data/markdown/html-mixed.md diff --git a/packages/markdown-html/test/data/inline-code.md b/packages/markdown-html/test/data/markdown/inline-code.md similarity index 100% rename from packages/markdown-html/test/data/inline-code.md rename to packages/markdown-html/test/data/markdown/inline-code.md diff --git a/packages/markdown-html/test/data/link.md b/packages/markdown-html/test/data/markdown/link.md similarity index 100% rename from packages/markdown-html/test/data/link.md rename to packages/markdown-html/test/data/markdown/link.md diff --git a/packages/markdown-html/test/data/multiline-html-block.md b/packages/markdown-html/test/data/markdown/multiline-html-block.md similarity index 100% rename from packages/markdown-html/test/data/multiline-html-block.md rename to packages/markdown-html/test/data/markdown/multiline-html-block.md diff --git a/packages/markdown-html/test/data/ol-tight.md b/packages/markdown-html/test/data/markdown/ol-tight.md similarity index 100% rename from packages/markdown-html/test/data/ol-tight.md rename to packages/markdown-html/test/data/markdown/ol-tight.md diff --git a/packages/markdown-html/test/data/ol.md b/packages/markdown-html/test/data/markdown/ol.md similarity index 100% rename from packages/markdown-html/test/data/ol.md rename to packages/markdown-html/test/data/markdown/ol.md diff --git a/packages/markdown-html/test/data/paragraphs.md b/packages/markdown-html/test/data/markdown/paragraphs.md similarity index 100% rename from packages/markdown-html/test/data/paragraphs.md rename to packages/markdown-html/test/data/markdown/paragraphs.md diff --git a/packages/markdown-html/test/data/strong.md b/packages/markdown-html/test/data/markdown/strong.md similarity index 100% rename from packages/markdown-html/test/data/strong.md rename to packages/markdown-html/test/data/markdown/strong.md diff --git a/packages/markdown-html/test/data/text.md b/packages/markdown-html/test/data/markdown/text.md similarity index 100% rename from packages/markdown-html/test/data/text.md rename to packages/markdown-html/test/data/markdown/text.md diff --git a/packages/markdown-html/test/data/thematicbreak.md b/packages/markdown-html/test/data/markdown/thematicbreak.md similarity index 100% rename from packages/markdown-html/test/data/thematicbreak.md rename to packages/markdown-html/test/data/markdown/thematicbreak.md diff --git a/packages/markdown-html/test/data/ul-tight.md b/packages/markdown-html/test/data/markdown/ul-tight.md similarity index 100% rename from packages/markdown-html/test/data/ul-tight.md rename to packages/markdown-html/test/data/markdown/ul-tight.md diff --git a/packages/markdown-html/test/data/ul.md b/packages/markdown-html/test/data/markdown/ul.md similarity index 100% rename from packages/markdown-html/test/data/ul.md rename to packages/markdown-html/test/data/markdown/ul.md diff --git a/packages/markdown-html/test/data/spec.txt b/packages/markdown-html/test/data/spec.txt deleted file mode 100644 index 3913de44..00000000 --- a/packages/markdown-html/test/data/spec.txt +++ /dev/null @@ -1,9710 +0,0 @@ ---- -title: CommonMark Spec -author: John MacFarlane -version: 0.29 -date: '2019-04-06' -license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)' -... - -# Introduction - -## What is Markdown? - -Markdown is a plain text format for writing structured documents, -based on conventions for indicating formatting in email -and usenet posts. It was developed by John Gruber (with -help from Aaron Swartz) and released in 2004 in the form of a -[syntax description](http://daringfireball.net/projects/markdown/syntax) -and a Perl script (`Markdown.pl`) for converting Markdown to -HTML. In the next decade, dozens of implementations were -developed in many languages. Some extended the original -Markdown syntax with conventions for footnotes, tables, and -other document elements. Some allowed Markdown documents to be -rendered in formats other than HTML. Websites like Reddit, -StackOverflow, and GitHub had millions of people using Markdown. -And Markdown started to be used beyond the web, to author books, -articles, slide shows, letters, and lecture notes. - -What distinguishes Markdown from many other lightweight markup -syntaxes, which are often easier to write, is its readability. -As Gruber writes: - -> The overriding design goal for Markdown's formatting syntax is -> to make it as readable as possible. The idea is that a -> Markdown-formatted document should be publishable as-is, as -> plain text, without looking like it's been marked up with tags -> or formatting instructions. -> () - -The point can be illustrated by comparing a sample of -[AsciiDoc](http://www.methods.co.nz/asciidoc/) with -an equivalent sample of Markdown. Here is a sample of -AsciiDoc from the AsciiDoc manual: - -``` -1. List item one. -+ -List item one continued with a second paragraph followed by an -Indented block. -+ -................. -$ ls *.sh -$ mv *.sh ~/tmp -................. -+ -List item continued with a third paragraph. - -2. List item two continued with an open block. -+ --- -This paragraph is part of the preceding list item. - -a. This list is nested and does not require explicit item -continuation. -+ -This paragraph is part of the preceding list item. - -b. List item b. - -This paragraph belongs to item two of the outer list. --- -``` - -And here is the equivalent in Markdown: -``` -1. List item one. - - List item one continued with a second paragraph followed by an - Indented block. - - $ ls *.sh - $ mv *.sh ~/tmp - - List item continued with a third paragraph. - -2. List item two continued with an open block. - - This paragraph is part of the preceding list item. - - 1. This list is nested and does not require explicit item continuation. - - This paragraph is part of the preceding list item. - - 2. List item b. - - This paragraph belongs to item two of the outer list. -``` - -The AsciiDoc version is, arguably, easier to write. You don't need -to worry about indentation. But the Markdown version is much easier -to read. The nesting of list items is apparent to the eye in the -source, not just in the processed document. - -## Why is a spec needed? - -John Gruber's [canonical description of Markdown's -syntax](http://daringfireball.net/projects/markdown/syntax) -does not specify the syntax unambiguously. Here are some examples of -questions it does not answer: - -1. How much indentation is needed for a sublist? The spec says that - continuation paragraphs need to be indented four spaces, but is - not fully explicit about sublists. It is natural to think that - they, too, must be indented four spaces, but `Markdown.pl` does - not require that. This is hardly a "corner case," and divergences - between implementations on this issue often lead to surprises for - users in real documents. (See [this comment by John - Gruber](http://article.gmane.org/gmane.text.markdown.general/1997).) - -2. Is a blank line needed before a block quote or heading? - Most implementations do not require the blank line. However, - this can lead to unexpected results in hard-wrapped text, and - also to ambiguities in parsing (note that some implementations - put the heading inside the blockquote, while others do not). - (John Gruber has also spoken [in favor of requiring the blank - lines](http://article.gmane.org/gmane.text.markdown.general/2146).) - -3. Is a blank line needed before an indented code block? - (`Markdown.pl` requires it, but this is not mentioned in the - documentation, and some implementations do not require it.) - - ``` markdown - paragraph - code? - ``` - -4. What is the exact rule for determining when list items get - wrapped in `

    ` tags? Can a list be partially "loose" and partially - "tight"? What should we do with a list like this? - - ``` markdown - 1. one - - 2. two - 3. three - ``` - - Or this? - - ``` markdown - 1. one - - a - - - b - 2. two - ``` - - (There are some relevant comments by John Gruber - [here](http://article.gmane.org/gmane.text.markdown.general/2554).) - -5. Can list markers be indented? Can ordered list markers be right-aligned? - - ``` markdown - 8. item 1 - 9. item 2 - 10. item 2a - ``` - -6. Is this one list with a thematic break in its second item, - or two lists separated by a thematic break? - - ``` markdown - * a - * * * * * - * b - ``` - -7. When list markers change from numbers to bullets, do we have - two lists or one? (The Markdown syntax description suggests two, - but the perl scripts and many other implementations produce one.) - - ``` markdown - 1. fee - 2. fie - - foe - - fum - ``` - -8. What are the precedence rules for the markers of inline structure? - For example, is the following a valid link, or does the code span - take precedence ? - - ``` markdown - [a backtick (`)](/url) and [another backtick (`)](/url). - ``` - -9. What are the precedence rules for markers of emphasis and strong - emphasis? For example, how should the following be parsed? - - ``` markdown - *foo *bar* baz* - ``` - -10. What are the precedence rules between block-level and inline-level - structure? For example, how should the following be parsed? - - ``` markdown - - `a long code span can contain a hyphen like this - - and it can screw things up` - ``` - -11. Can list items include section headings? (`Markdown.pl` does not - allow this, but does allow blockquotes to include headings.) - - ``` markdown - - # Heading - ``` - -12. Can list items be empty? - - ``` markdown - * a - * - * b - ``` - -13. Can link references be defined inside block quotes or list items? - - ``` markdown - > Blockquote [foo]. - > - > [foo]: /url - ``` - -14. If there are multiple definitions for the same reference, which takes - precedence? - - ``` markdown - [foo]: /url1 - [foo]: /url2 - - [foo][] - ``` - -In the absence of a spec, early implementers consulted `Markdown.pl` -to resolve these ambiguities. But `Markdown.pl` was quite buggy, and -gave manifestly bad results in many cases, so it was not a -satisfactory replacement for a spec. - -Because there is no unambiguous spec, implementations have diverged -considerably. As a result, users are often surprised to find that -a document that renders one way on one system (say, a GitHub wiki) -renders differently on another (say, converting to docbook using -pandoc). To make matters worse, because nothing in Markdown counts -as a "syntax error," the divergence often isn't discovered right away. - -## About this document - -This document attempts to specify Markdown syntax unambiguously. -It contains many examples with side-by-side Markdown and -HTML. These are intended to double as conformance tests. An -accompanying script `spec_tests.py` can be used to run the tests -against any Markdown program: - - python test/spec_tests.py --spec spec.txt --program PROGRAM - -Since this document describes how Markdown is to be parsed into -an abstract syntax tree, it would have made sense to use an abstract -representation of the syntax tree instead of HTML. But HTML is capable -of representing the structural distinctions we need to make, and the -choice of HTML for the tests makes it possible to run the tests against -an implementation without writing an abstract syntax tree renderer. - -This document is generated from a text file, `spec.txt`, written -in Markdown with a small extension for the side-by-side tests. -The script `tools/makespec.py` can be used to convert `spec.txt` into -HTML or CommonMark (which can then be converted into other formats). - -In the examples, the `→` character is used to represent tabs. - -# Preliminaries - -## Characters and lines - -Any sequence of [characters] is a valid CommonMark -document. - -A [character](@) is a Unicode code point. Although some -code points (for example, combining accents) do not correspond to -characters in an intuitive sense, all code points count as characters -for purposes of this spec. - -This spec does not specify an encoding; it thinks of lines as composed -of [characters] rather than bytes. A conforming parser may be limited -to a certain encoding. - -A [line](@) is a sequence of zero or more [characters] -other than newline (`U+000A`) or carriage return (`U+000D`), -followed by a [line ending] or by the end of file. - -A [line ending](@) is a newline (`U+000A`), a carriage return -(`U+000D`) not followed by a newline, or a carriage return and a -following newline. - -A line containing no characters, or a line containing only spaces -(`U+0020`) or tabs (`U+0009`), is called a [blank line](@). - -The following definitions of character classes will be used in this spec: - -A [whitespace character](@) is a space -(`U+0020`), tab (`U+0009`), newline (`U+000A`), line tabulation (`U+000B`), -form feed (`U+000C`), or carriage return (`U+000D`). - -[Whitespace](@) is a sequence of one or more [whitespace -characters]. - -A [Unicode whitespace character](@) is -any code point in the Unicode `Zs` general category, or a tab (`U+0009`), -carriage return (`U+000D`), newline (`U+000A`), or form feed -(`U+000C`). - -[Unicode whitespace](@) is a sequence of one -or more [Unicode whitespace characters]. - -A [space](@) is `U+0020`. - -A [non-whitespace character](@) is any character -that is not a [whitespace character]. - -An [ASCII punctuation character](@) -is `!`, `"`, `#`, `$`, `%`, `&`, `'`, `(`, `)`, -`*`, `+`, `,`, `-`, `.`, `/` (U+0021–2F), -`:`, `;`, `<`, `=`, `>`, `?`, `@` (U+003A–0040), -`[`, `\`, `]`, `^`, `_`, `` ` `` (U+005B–0060), -`{`, `|`, `}`, or `~` (U+007B–007E). - -A [punctuation character](@) is an [ASCII -punctuation character] or anything in -the general Unicode categories `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`. - -## Tabs - -Tabs in lines are not expanded to [spaces]. However, -in contexts where whitespace helps to define block structure, -tabs behave as if they were replaced by spaces with a tab stop -of 4 characters. - -Thus, for example, a tab can be used instead of four spaces -in an indented code block. (Note, however, that internal -tabs are passed through as literal tabs, not expanded to -spaces.) - -```````````````````````````````` example -→foo→baz→→bim -. -

    foo→baz→→bim
    -
    -```````````````````````````````` - -```````````````````````````````` example - →foo→baz→→bim -. -
    foo→baz→→bim
    -
    -```````````````````````````````` - -```````````````````````````````` example - a→a - ὐ→a -. -
    a→a
    -ὐ→a
    -
    -```````````````````````````````` - -In the following example, a continuation paragraph of a list -item is indented with a tab; this has exactly the same effect -as indentation with four spaces would: - -```````````````````````````````` example - - foo - -→bar -. -
      -
    • -

      foo

      -

      bar

      -
    • -
    -```````````````````````````````` - -```````````````````````````````` example -- foo - -→→bar -. -
      -
    • -

      foo

      -
        bar
      -
      -
    • -
    -```````````````````````````````` - -Normally the `>` that begins a block quote may be followed -optionally by a space, which is not considered part of the -content. In the following case `>` is followed by a tab, -which is treated as if it were expanded into three spaces. -Since one of these spaces is considered part of the -delimiter, `foo` is considered to be indented six spaces -inside the block quote context, so we get an indented -code block starting with two spaces. - -```````````````````````````````` example ->→→foo -. -
    -
      foo
    -
    -
    -```````````````````````````````` - -```````````````````````````````` example --→→foo -. -
      -
    • -
        foo
      -
      -
    • -
    -```````````````````````````````` - - -```````````````````````````````` example - foo -→bar -. -
    foo
    -bar
    -
    -```````````````````````````````` - -```````````````````````````````` example - - foo - - bar -→ - baz -. -
      -
    • foo -
        -
      • bar -
          -
        • baz
        • -
        -
      • -
      -
    • -
    -```````````````````````````````` - -```````````````````````````````` example -#→Foo -. -

    Foo

    -```````````````````````````````` - -```````````````````````````````` example -*→*→*→ -. -
    -```````````````````````````````` - - -## Insecure characters - -For security reasons, the Unicode character `U+0000` must be replaced -with the REPLACEMENT CHARACTER (`U+FFFD`). - -# Blocks and inlines - -We can think of a document as a sequence of -[blocks](@)---structural elements like paragraphs, block -quotations, lists, headings, rules, and code blocks. Some blocks (like -block quotes and list items) contain other blocks; others (like -headings and paragraphs) contain [inline](@) content---text, -links, emphasized text, images, code spans, and so on. - -## Precedence - -Indicators of block structure always take precedence over indicators -of inline structure. So, for example, the following is a list with -two items, not a list with one item containing a code span: - -```````````````````````````````` example -- `one -- two` -. -
      -
    • `one
    • -
    • two`
    • -
    -```````````````````````````````` - - -This means that parsing can proceed in two steps: first, the block -structure of the document can be discerned; second, text lines inside -paragraphs, headings, and other block constructs can be parsed for inline -structure. The second step requires information about link reference -definitions that will be available only at the end of the first -step. Note that the first step requires processing lines in sequence, -but the second can be parallelized, since the inline parsing of -one block element does not affect the inline parsing of any other. - -## Container blocks and leaf blocks - -We can divide blocks into two types: -[container blocks](@), -which can contain other blocks, and [leaf blocks](@), -which cannot. - -# Leaf blocks - -This section describes the different kinds of leaf block that make up a -Markdown document. - -## Thematic breaks - -A line consisting of 0-3 spaces of indentation, followed by a sequence -of three or more matching `-`, `_`, or `*` characters, each followed -optionally by any number of spaces or tabs, forms a -[thematic break](@). - -```````````````````````````````` example -*** ---- -___ -. -
    -
    -
    -```````````````````````````````` - - -Wrong characters: - -```````````````````````````````` example -+++ -. -

    +++

    -```````````````````````````````` - - -```````````````````````````````` example -=== -. -

    ===

    -```````````````````````````````` - - -Not enough characters: - -```````````````````````````````` example --- -** -__ -. -

    -- -** -__

    -```````````````````````````````` - - -One to three spaces indent are allowed: - -```````````````````````````````` example - *** - *** - *** -. -
    -
    -
    -```````````````````````````````` - - -Four spaces is too many: - -```````````````````````````````` example - *** -. -
    ***
    -
    -```````````````````````````````` - - -```````````````````````````````` example -Foo - *** -. -

    Foo -***

    -```````````````````````````````` - - -More than three characters may be used: - -```````````````````````````````` example -_____________________________________ -. -
    -```````````````````````````````` - - -Spaces are allowed between the characters: - -```````````````````````````````` example - - - - -. -
    -```````````````````````````````` - - -```````````````````````````````` example - ** * ** * ** * ** -. -
    -```````````````````````````````` - - -```````````````````````````````` example -- - - - -. -
    -```````````````````````````````` - - -Spaces are allowed at the end: - -```````````````````````````````` example -- - - - -. -
    -```````````````````````````````` - - -However, no other characters may occur in the line: - -```````````````````````````````` example -_ _ _ _ a - -a------ - ----a--- -. -

    _ _ _ _ a

    -

    a------

    -

    ---a---

    -```````````````````````````````` - - -It is required that all of the [non-whitespace characters] be the same. -So, this is not a thematic break: - -```````````````````````````````` example - *-* -. -

    -

    -```````````````````````````````` - - -Thematic breaks do not need blank lines before or after: - -```````````````````````````````` example -- foo -*** -- bar -. -
      -
    • foo
    • -
    -
    -
      -
    • bar
    • -
    -```````````````````````````````` - - -Thematic breaks can interrupt a paragraph: - -```````````````````````````````` example -Foo -*** -bar -. -

    Foo

    -
    -

    bar

    -```````````````````````````````` - - -If a line of dashes that meets the above conditions for being a -thematic break could also be interpreted as the underline of a [setext -heading], the interpretation as a -[setext heading] takes precedence. Thus, for example, -this is a setext heading, not a paragraph followed by a thematic break: - -```````````````````````````````` example -Foo ---- -bar -. -

    Foo

    -

    bar

    -```````````````````````````````` - - -When both a thematic break and a list item are possible -interpretations of a line, the thematic break takes precedence: - -```````````````````````````````` example -* Foo -* * * -* Bar -. -
      -
    • Foo
    • -
    -
    -
      -
    • Bar
    • -
    -```````````````````````````````` - - -If you want a thematic break in a list item, use a different bullet: - -```````````````````````````````` example -- Foo -- * * * -. -
      -
    • Foo
    • -
    • -
      -
    • -
    -```````````````````````````````` - - -## ATX headings - -An [ATX heading](@) -consists of a string of characters, parsed as inline content, between an -opening sequence of 1--6 unescaped `#` characters and an optional -closing sequence of any number of unescaped `#` characters. -The opening sequence of `#` characters must be followed by a -[space] or by the end of line. The optional closing sequence of `#`s must be -preceded by a [space] and may be followed by spaces only. The opening -`#` character may be indented 0-3 spaces. The raw contents of the -heading are stripped of leading and trailing spaces before being parsed -as inline content. The heading level is equal to the number of `#` -characters in the opening sequence. - -Simple headings: - -```````````````````````````````` example -# foo -## foo -### foo -#### foo -##### foo -###### foo -. -

    foo

    -

    foo

    -

    foo

    -

    foo

    -
    foo
    -
    foo
    -```````````````````````````````` - - -More than six `#` characters is not a heading: - -```````````````````````````````` example -####### foo -. -

    ####### foo

    -```````````````````````````````` - - -At least one space is required between the `#` characters and the -heading's contents, unless the heading is empty. Note that many -implementations currently do not require the space. However, the -space was required by the -[original ATX implementation](http://www.aaronsw.com/2002/atx/atx.py), -and it helps prevent things like the following from being parsed as -headings: - -```````````````````````````````` example -#5 bolt - -#hashtag -. -

    #5 bolt

    -

    #hashtag

    -```````````````````````````````` - - -This is not a heading, because the first `#` is escaped: - -```````````````````````````````` example -\## foo -. -

    ## foo

    -```````````````````````````````` - - -Contents are parsed as inlines: - -```````````````````````````````` example -# foo *bar* \*baz\* -. -

    foo bar *baz*

    -```````````````````````````````` - - -Leading and trailing [whitespace] is ignored in parsing inline content: - -```````````````````````````````` example -# foo -. -

    foo

    -```````````````````````````````` - - -One to three spaces indentation are allowed: - -```````````````````````````````` example - ### foo - ## foo - # foo -. -

    foo

    -

    foo

    -

    foo

    -```````````````````````````````` - - -Four spaces are too much: - -```````````````````````````````` example - # foo -. -
    # foo
    -
    -```````````````````````````````` - - -```````````````````````````````` example -foo - # bar -. -

    foo -# bar

    -```````````````````````````````` - - -A closing sequence of `#` characters is optional: - -```````````````````````````````` example -## foo ## - ### bar ### -. -

    foo

    -

    bar

    -```````````````````````````````` - - -It need not be the same length as the opening sequence: - -```````````````````````````````` example -# foo ################################## -##### foo ## -. -

    foo

    -
    foo
    -```````````````````````````````` - - -Spaces are allowed after the closing sequence: - -```````````````````````````````` example -### foo ### -. -

    foo

    -```````````````````````````````` - - -A sequence of `#` characters with anything but [spaces] following it -is not a closing sequence, but counts as part of the contents of the -heading: - -```````````````````````````````` example -### foo ### b -. -

    foo ### b

    -```````````````````````````````` - - -The closing sequence must be preceded by a space: - -```````````````````````````````` example -# foo# -. -

    foo#

    -```````````````````````````````` - - -Backslash-escaped `#` characters do not count as part -of the closing sequence: - -```````````````````````````````` example -### foo \### -## foo #\## -# foo \# -. -

    foo ###

    -

    foo ###

    -

    foo #

    -```````````````````````````````` - - -ATX headings need not be separated from surrounding content by blank -lines, and they can interrupt paragraphs: - -```````````````````````````````` example -**** -## foo -**** -. -
    -

    foo

    -
    -```````````````````````````````` - - -```````````````````````````````` example -Foo bar -# baz -Bar foo -. -

    Foo bar

    -

    baz

    -

    Bar foo

    -```````````````````````````````` - - -ATX headings can be empty: - -```````````````````````````````` example -## -# -### ### -. -

    -

    -

    -```````````````````````````````` - - -## Setext headings - -A [setext heading](@) consists of one or more -lines of text, each containing at least one [non-whitespace -character], with no more than 3 spaces indentation, followed by -a [setext heading underline]. The lines of text must be such -that, were they not followed by the setext heading underline, -they would be interpreted as a paragraph: they cannot be -interpretable as a [code fence], [ATX heading][ATX headings], -[block quote][block quotes], [thematic break][thematic breaks], -[list item][list items], or [HTML block][HTML blocks]. - -A [setext heading underline](@) is a sequence of -`=` characters or a sequence of `-` characters, with no more than 3 -spaces indentation and any number of trailing spaces. If a line -containing a single `-` can be interpreted as an -empty [list items], it should be interpreted this way -and not as a [setext heading underline]. - -The heading is a level 1 heading if `=` characters are used in -the [setext heading underline], and a level 2 heading if `-` -characters are used. The contents of the heading are the result -of parsing the preceding lines of text as CommonMark inline -content. - -In general, a setext heading need not be preceded or followed by a -blank line. However, it cannot interrupt a paragraph, so when a -setext heading comes after a paragraph, a blank line is needed between -them. - -Simple examples: - -```````````````````````````````` example -Foo *bar* -========= - -Foo *bar* ---------- -. -

    Foo bar

    -

    Foo bar

    -```````````````````````````````` - - -The content of the header may span more than one line: - -```````````````````````````````` example -Foo *bar -baz* -==== -. -

    Foo bar -baz

    -```````````````````````````````` - -The contents are the result of parsing the headings's raw -content as inlines. The heading's raw content is formed by -concatenating the lines and removing initial and final -[whitespace]. - -```````````````````````````````` example - Foo *bar -baz*→ -==== -. -

    Foo bar -baz

    -```````````````````````````````` - - -The underlining can be any length: - -```````````````````````````````` example -Foo -------------------------- - -Foo -= -. -

    Foo

    -

    Foo

    -```````````````````````````````` - - -The heading content can be indented up to three spaces, and need -not line up with the underlining: - -```````````````````````````````` example - Foo ---- - - Foo ------ - - Foo - === -. -

    Foo

    -

    Foo

    -

    Foo

    -```````````````````````````````` - - -Four spaces indent is too much: - -```````````````````````````````` example - Foo - --- - - Foo ---- -. -
    Foo
    ----
    -
    -Foo
    -
    -
    -```````````````````````````````` - - -The setext heading underline can be indented up to three spaces, and -may have trailing spaces: - -```````````````````````````````` example -Foo - ---- -. -

    Foo

    -```````````````````````````````` - - -Four spaces is too much: - -```````````````````````````````` example -Foo - --- -. -

    Foo ----

    -```````````````````````````````` - - -The setext heading underline cannot contain internal spaces: - -```````````````````````````````` example -Foo -= = - -Foo ---- - -. -

    Foo -= =

    -

    Foo

    -
    -```````````````````````````````` - - -Trailing spaces in the content line do not cause a line break: - -```````````````````````````````` example -Foo ------ -. -

    Foo

    -```````````````````````````````` - - -Nor does a backslash at the end: - -```````````````````````````````` example -Foo\ ----- -. -

    Foo\

    -```````````````````````````````` - - -Since indicators of block structure take precedence over -indicators of inline structure, the following are setext headings: - -```````````````````````````````` example -`Foo ----- -` - - -. -

    `Foo

    -

    `

    -

    <a title="a lot

    -

    of dashes"/>

    -```````````````````````````````` - - -The setext heading underline cannot be a [lazy continuation -line] in a list item or block quote: - -```````````````````````````````` example -> Foo ---- -. -
    -

    Foo

    -
    -
    -```````````````````````````````` - - -```````````````````````````````` example -> foo -bar -=== -. -
    -

    foo -bar -===

    -
    -```````````````````````````````` - - -```````````````````````````````` example -- Foo ---- -. -
      -
    • Foo
    • -
    -
    -```````````````````````````````` - - -A blank line is needed between a paragraph and a following -setext heading, since otherwise the paragraph becomes part -of the heading's content: - -```````````````````````````````` example -Foo -Bar ---- -. -

    Foo -Bar

    -```````````````````````````````` - - -But in general a blank line is not required before or after -setext headings: - -```````````````````````````````` example ---- -Foo ---- -Bar ---- -Baz -. -
    -

    Foo

    -

    Bar

    -

    Baz

    -```````````````````````````````` - - -Setext headings cannot be empty: - -```````````````````````````````` example - -==== -. -

    ====

    -```````````````````````````````` - - -Setext heading text lines must not be interpretable as block -constructs other than paragraphs. So, the line of dashes -in these examples gets interpreted as a thematic break: - -```````````````````````````````` example ---- ---- -. -
    -
    -```````````````````````````````` - - -```````````````````````````````` example -- foo ------ -. -
      -
    • foo
    • -
    -
    -```````````````````````````````` - - -```````````````````````````````` example - foo ---- -. -
    foo
    -
    -
    -```````````````````````````````` - - -```````````````````````````````` example -> foo ------ -. -
    -

    foo

    -
    -
    -```````````````````````````````` - - -If you want a heading with `> foo` as its literal text, you can -use backslash escapes: - -```````````````````````````````` example -\> foo ------- -. -

    > foo

    -```````````````````````````````` - - -**Compatibility note:** Most existing Markdown implementations -do not allow the text of setext headings to span multiple lines. -But there is no consensus about how to interpret - -``` markdown -Foo -bar ---- -baz -``` - -One can find four different interpretations: - -1. paragraph "Foo", heading "bar", paragraph "baz" -2. paragraph "Foo bar", thematic break, paragraph "baz" -3. paragraph "Foo bar --- baz" -4. heading "Foo bar", paragraph "baz" - -We find interpretation 4 most natural, and interpretation 4 -increases the expressive power of CommonMark, by allowing -multiline headings. Authors who want interpretation 1 can -put a blank line after the first paragraph: - -```````````````````````````````` example -Foo - -bar ---- -baz -. -

    Foo

    -

    bar

    -

    baz

    -```````````````````````````````` - - -Authors who want interpretation 2 can put blank lines around -the thematic break, - -```````````````````````````````` example -Foo -bar - ---- - -baz -. -

    Foo -bar

    -
    -

    baz

    -```````````````````````````````` - - -or use a thematic break that cannot count as a [setext heading -underline], such as - -```````````````````````````````` example -Foo -bar -* * * -baz -. -

    Foo -bar

    -
    -

    baz

    -```````````````````````````````` - - -Authors who want interpretation 3 can use backslash escapes: - -```````````````````````````````` example -Foo -bar -\--- -baz -. -

    Foo -bar ---- -baz

    -```````````````````````````````` - - -## Indented code blocks - -An [indented code block](@) is composed of one or more -[indented chunks] separated by blank lines. -An [indented chunk](@) is a sequence of non-blank lines, -each indented four or more spaces. The contents of the code block are -the literal contents of the lines, including trailing -[line endings], minus four spaces of indentation. -An indented code block has no [info string]. - -An indented code block cannot interrupt a paragraph, so there must be -a blank line between a paragraph and a following indented code block. -(A blank line is not needed, however, between a code block and a following -paragraph.) - -```````````````````````````````` example - a simple - indented code block -. -
    a simple
    -  indented code block
    -
    -```````````````````````````````` - - -If there is any ambiguity between an interpretation of indentation -as a code block and as indicating that material belongs to a [list -item][list items], the list item interpretation takes precedence: - -```````````````````````````````` example - - foo - - bar -. -
      -
    • -

      foo

      -

      bar

      -
    • -
    -```````````````````````````````` - - -```````````````````````````````` example -1. foo - - - bar -. -
      -
    1. -

      foo

      -
        -
      • bar
      • -
      -
    2. -
    -```````````````````````````````` - - - -The contents of a code block are literal text, and do not get parsed -as Markdown: - -```````````````````````````````` example -
    - *hi* - - - one -. -
    <a/>
    -*hi*
    -
    -- one
    -
    -```````````````````````````````` - - -Here we have three chunks separated by blank lines: - -```````````````````````````````` example - chunk1 - - chunk2 - - - - chunk3 -. -
    chunk1
    -
    -chunk2
    -
    -
    -
    -chunk3
    -
    -```````````````````````````````` - - -Any initial spaces beyond four will be included in the content, even -in interior blank lines: - -```````````````````````````````` example - chunk1 - - chunk2 -. -
    chunk1
    -  
    -  chunk2
    -
    -```````````````````````````````` - - -An indented code block cannot interrupt a paragraph. (This -allows hanging indents and the like.) - -```````````````````````````````` example -Foo - bar - -. -

    Foo -bar

    -```````````````````````````````` - - -However, any non-blank line with fewer than four leading spaces ends -the code block immediately. So a paragraph may occur immediately -after indented code: - -```````````````````````````````` example - foo -bar -. -
    foo
    -
    -

    bar

    -```````````````````````````````` - - -And indented code can occur immediately before and after other kinds of -blocks: - -```````````````````````````````` example -# Heading - foo -Heading ------- - foo ----- -. -

    Heading

    -
    foo
    -
    -

    Heading

    -
    foo
    -
    -
    -```````````````````````````````` - - -The first line can be indented more than four spaces: - -```````````````````````````````` example - foo - bar -. -
        foo
    -bar
    -
    -```````````````````````````````` - - -Blank lines preceding or following an indented code block -are not included in it: - -```````````````````````````````` example - - - foo - - -. -
    foo
    -
    -```````````````````````````````` - - -Trailing spaces are included in the code block's content: - -```````````````````````````````` example - foo -. -
    foo  
    -
    -```````````````````````````````` - - - -## Fenced code blocks - -A [code fence](@) is a sequence -of at least three consecutive backtick characters (`` ` ``) or -tildes (`~`). (Tildes and backticks cannot be mixed.) -A [fenced code block](@) -begins with a code fence, indented no more than three spaces. - -The line with the opening code fence may optionally contain some text -following the code fence; this is trimmed of leading and trailing -whitespace and called the [info string](@). If the [info string] comes -after a backtick fence, it may not contain any backtick -characters. (The reason for this restriction is that otherwise -some inline code would be incorrectly interpreted as the -beginning of a fenced code block.) - -The content of the code block consists of all subsequent lines, until -a closing [code fence] of the same type as the code block -began with (backticks or tildes), and with at least as many backticks -or tildes as the opening code fence. If the leading code fence is -indented N spaces, then up to N spaces of indentation are removed from -each line of the content (if present). (If a content line is not -indented, it is preserved unchanged. If it is indented less than N -spaces, all of the indentation is removed.) - -The closing code fence may be indented up to three spaces, and may be -followed only by spaces, which are ignored. If the end of the -containing block (or document) is reached and no closing code fence -has been found, the code block contains all of the lines after the -opening code fence until the end of the containing block (or -document). (An alternative spec would require backtracking in the -event that a closing code fence is not found. But this makes parsing -much less efficient, and there seems to be no real down side to the -behavior described here.) - -A fenced code block may interrupt a paragraph, and does not require -a blank line either before or after. - -The content of a code fence is treated as literal text, not parsed -as inlines. The first word of the [info string] is typically used to -specify the language of the code sample, and rendered in the `class` -attribute of the `code` tag. However, this spec does not mandate any -particular treatment of the [info string]. - -Here is a simple example with backticks: - -```````````````````````````````` example -``` -< - > -``` -. -
    <
    - >
    -
    -```````````````````````````````` - - -With tildes: - -```````````````````````````````` example -~~~ -< - > -~~~ -. -
    <
    - >
    -
    -```````````````````````````````` - -Fewer than three backticks is not enough: - -```````````````````````````````` example -`` -foo -`` -. -

    foo

    -```````````````````````````````` - -The closing code fence must use the same character as the opening -fence: - -```````````````````````````````` example -``` -aaa -~~~ -``` -. -
    aaa
    -~~~
    -
    -```````````````````````````````` - - -```````````````````````````````` example -~~~ -aaa -``` -~~~ -. -
    aaa
    -```
    -
    -```````````````````````````````` - - -The closing code fence must be at least as long as the opening fence: - -```````````````````````````````` example -```` -aaa -``` -`````` -. -
    aaa
    -```
    -
    -```````````````````````````````` - - -```````````````````````````````` example -~~~~ -aaa -~~~ -~~~~ -. -
    aaa
    -~~~
    -
    -```````````````````````````````` - - -Unclosed code blocks are closed by the end of the document -(or the enclosing [block quote][block quotes] or [list item][list items]): - -```````````````````````````````` example -``` -. -
    -```````````````````````````````` - - -```````````````````````````````` example -````` - -``` -aaa -. -
    
    -```
    -aaa
    -
    -```````````````````````````````` - - -```````````````````````````````` example -> ``` -> aaa - -bbb -. -
    -
    aaa
    -
    -
    -

    bbb

    -```````````````````````````````` - - -A code block can have all empty lines as its content: - -```````````````````````````````` example -``` - - -``` -. -
    
    -  
    -
    -```````````````````````````````` - - -A code block can be empty: - -```````````````````````````````` example -``` -``` -. -
    -```````````````````````````````` - - -Fences can be indented. If the opening fence is indented, -content lines will have equivalent opening indentation removed, -if present: - -```````````````````````````````` example - ``` - aaa -aaa -``` -. -
    aaa
    -aaa
    -
    -```````````````````````````````` - - -```````````````````````````````` example - ``` -aaa - aaa -aaa - ``` -. -
    aaa
    -aaa
    -aaa
    -
    -```````````````````````````````` - - -```````````````````````````````` example - ``` - aaa - aaa - aaa - ``` -. -
    aaa
    - aaa
    -aaa
    -
    -```````````````````````````````` - - -Four spaces indentation produces an indented code block: - -```````````````````````````````` example - ``` - aaa - ``` -. -
    ```
    -aaa
    -```
    -
    -```````````````````````````````` - - -Closing fences may be indented by 0-3 spaces, and their indentation -need not match that of the opening fence: - -```````````````````````````````` example -``` -aaa - ``` -. -
    aaa
    -
    -```````````````````````````````` - - -```````````````````````````````` example - ``` -aaa - ``` -. -
    aaa
    -
    -```````````````````````````````` - - -This is not a closing fence, because it is indented 4 spaces: - -```````````````````````````````` example -``` -aaa - ``` -. -
    aaa
    -    ```
    -
    -```````````````````````````````` - - - -Code fences (opening and closing) cannot contain internal spaces: - -```````````````````````````````` example -``` ``` -aaa -. -

    -aaa

    -```````````````````````````````` - - -```````````````````````````````` example -~~~~~~ -aaa -~~~ ~~ -. -
    aaa
    -~~~ ~~
    -
    -```````````````````````````````` - - -Fenced code blocks can interrupt paragraphs, and can be followed -directly by paragraphs, without a blank line between: - -```````````````````````````````` example -foo -``` -bar -``` -baz -. -

    foo

    -
    bar
    -
    -

    baz

    -```````````````````````````````` - - -Other blocks can also occur before and after fenced code blocks -without an intervening blank line: - -```````````````````````````````` example -foo ---- -~~~ -bar -~~~ -# baz -. -

    foo

    -
    bar
    -
    -

    baz

    -```````````````````````````````` - - -An [info string] can be provided after the opening code fence. -Although this spec doesn't mandate any particular treatment of -the info string, the first word is typically used to specify -the language of the code block. In HTML output, the language is -normally indicated by adding a class to the `code` element consisting -of `language-` followed by the language name. - -```````````````````````````````` example -```ruby -def foo(x) - return 3 -end -``` -. -
    def foo(x)
    -  return 3
    -end
    -
    -```````````````````````````````` - - -```````````````````````````````` example -~~~~ ruby startline=3 $%@#$ -def foo(x) - return 3 -end -~~~~~~~ -. -
    def foo(x)
    -  return 3
    -end
    -
    -```````````````````````````````` - - -```````````````````````````````` example -````; -```` -. -
    -```````````````````````````````` - - -[Info strings] for backtick code blocks cannot contain backticks: - -```````````````````````````````` example -``` aa ``` -foo -. -

    aa -foo

    -```````````````````````````````` - - -[Info strings] for tilde code blocks can contain backticks and tildes: - -```````````````````````````````` example -~~~ aa ``` ~~~ -foo -~~~ -. -
    foo
    -
    -```````````````````````````````` - - -Closing code fences cannot have [info strings]: - -```````````````````````````````` example -``` -``` aaa -``` -. -
    ``` aaa
    -
    -```````````````````````````````` - - - -## HTML blocks - -An [HTML block](@) is a group of lines that is treated -as raw HTML (and will not be escaped in HTML output). - -There are seven kinds of [HTML block], which can be defined by their -start and end conditions. The block begins with a line that meets a -[start condition](@) (after up to three spaces optional indentation). -It ends with the first subsequent line that meets a matching [end -condition](@), or the last line of the document, or the last line of -the [container block](#container-blocks) containing the current HTML -block, if no line is encountered that meets the [end condition]. If -the first line meets both the [start condition] and the [end -condition], the block will contain just that line. - -1. **Start condition:** line begins with the string ``, or the end of the line.\ -**End condition:** line contains an end tag -``, ``, or `` (case-insensitive; it -need not match the start tag). - -2. **Start condition:** line begins with the string ``. - -3. **Start condition:** line begins with the string ``. - -4. **Start condition:** line begins with the string ``. - -5. **Start condition:** line begins with the string -``. - -6. **Start condition:** line begins the string `<` or ``, or -the string `/>`.\ -**End condition:** line is followed by a [blank line]. - -7. **Start condition:** line begins with a complete [open tag] -(with any [tag name] other than `script`, -`style`, or `pre`) or a complete [closing tag], -followed only by [whitespace] or the end of the line.\ -**End condition:** line is followed by a [blank line]. - -HTML blocks continue until they are closed by their appropriate -[end condition], or the last line of the document or other [container -block](#container-blocks). This means any HTML **within an HTML -block** that might otherwise be recognised as a start condition will -be ignored by the parser and passed through as-is, without changing -the parser's state. - -For instance, `
    ` within a HTML block started by `` will not affect
    -the parser state; as the HTML block was started in by start condition 6, it
    -will end at any blank line. This can be surprising:
    -
    -```````````````````````````````` example
    -
    -
    -**Hello**,
    -
    -_world_.
    -
    -
    -. -
    -
    -**Hello**,
    -

    world. -

    -
    -```````````````````````````````` - -In this case, the HTML block is terminated by the newline — the `**Hello**` -text remains verbatim — and regular parsing resumes, with a paragraph, -emphasised `world` and inline and block HTML following. - -All types of [HTML blocks] except type 7 may interrupt -a paragraph. Blocks of type 7 may not interrupt a paragraph. -(This restriction is intended to prevent unwanted interpretation -of long tags inside a wrapped paragraph as starting HTML blocks.) - -Some simple examples follow. Here are some basic HTML blocks -of type 6: - -```````````````````````````````` example - - - - -
    - hi -
    - -okay. -. - - - - -
    - hi -
    -

    okay.

    -```````````````````````````````` - - -```````````````````````````````` example -
    -*foo* -```````````````````````````````` - - -Here we have two HTML blocks with a Markdown paragraph between them: - -```````````````````````````````` example -
    - -*Markdown* - -
    -. -
    -

    Markdown

    -
    -```````````````````````````````` - - -The tag on the first line can be partial, as long -as it is split where there would be whitespace: - -```````````````````````````````` example -
    -
    -. -
    -
    -```````````````````````````````` - - -```````````````````````````````` example -
    -
    -. -
    -
    -```````````````````````````````` - - -An open tag need not be closed: -```````````````````````````````` example -
    -*foo* - -*bar* -. -
    -*foo* -

    bar

    -```````````````````````````````` - - - -A partial tag need not even be completed (garbage -in, garbage out): - -```````````````````````````````` example -
    -. - -```````````````````````````````` - - -```````````````````````````````` example -
    -foo -
    -. -
    -foo -
    -```````````````````````````````` - - -Everything until the next blank line or end of document -gets included in the HTML block. So, in the following -example, what looks like a Markdown code block -is actually part of the HTML block, which continues until a blank -line or the end of the document is reached: - -```````````````````````````````` example -
    -``` c -int x = 33; -``` -. -
    -``` c -int x = 33; -``` -```````````````````````````````` - - -To start an [HTML block] with a tag that is *not* in the -list of block-level tags in (6), you must put the tag by -itself on the first line (and it must be complete): - -```````````````````````````````` example - -*bar* - -. - -*bar* - -```````````````````````````````` - - -In type 7 blocks, the [tag name] can be anything: - -```````````````````````````````` example - -*bar* - -. - -*bar* - -```````````````````````````````` - - -```````````````````````````````` example - -*bar* - -. - -*bar* - -```````````````````````````````` - - -```````````````````````````````` example - -*bar* -. - -*bar* -```````````````````````````````` - - -These rules are designed to allow us to work with tags that -can function as either block-level or inline-level tags. -The `` tag is a nice example. We can surround content with -`` tags in three different ways. In this case, we get a raw -HTML block, because the `` tag is on a line by itself: - -```````````````````````````````` example - -*foo* - -. - -*foo* - -```````````````````````````````` - - -In this case, we get a raw HTML block that just includes -the `` tag (because it ends with the following blank -line). So the contents get interpreted as CommonMark: - -```````````````````````````````` example - - -*foo* - - -. - -

    foo

    -
    -```````````````````````````````` - - -Finally, in this case, the `` tags are interpreted -as [raw HTML] *inside* the CommonMark paragraph. (Because -the tag is not on a line by itself, we get inline HTML -rather than an [HTML block].) - -```````````````````````````````` example -*foo* -. -

    foo

    -```````````````````````````````` - - -HTML tags designed to contain literal content -(`script`, `style`, `pre`), comments, processing instructions, -and declarations are treated somewhat differently. -Instead of ending at the first blank line, these blocks -end at the first line containing a corresponding end tag. -As a result, these blocks can contain blank lines: - -A pre tag (type 1): - -```````````````````````````````` example -
    
    -import Text.HTML.TagSoup
    -
    -main :: IO ()
    -main = print $ parseTags tags
    -
    -okay -. -
    
    -import Text.HTML.TagSoup
    -
    -main :: IO ()
    -main = print $ parseTags tags
    -
    -

    okay

    -```````````````````````````````` - - -A script tag (type 1): - -```````````````````````````````` example - -okay -. - -

    okay

    -```````````````````````````````` - - -A style tag (type 1): - -```````````````````````````````` example - -okay -. - -

    okay

    -```````````````````````````````` - - -If there is no matching end tag, the block will end at the -end of the document (or the enclosing [block quote][block quotes] -or [list item][list items]): - -```````````````````````````````` example - -*foo* -. - -

    foo

    -```````````````````````````````` - - -```````````````````````````````` example -*bar* -*baz* -. -*bar* -

    baz

    -```````````````````````````````` - - -Note that anything on the last line after the -end tag will be included in the [HTML block]: - -```````````````````````````````` example -1. *bar* -. -1. *bar* -```````````````````````````````` - - -A comment (type 2): - -```````````````````````````````` example - -okay -. - -

    okay

    -```````````````````````````````` - - - -A processing instruction (type 3): - -```````````````````````````````` example -'; - -?> -okay -. -'; - -?> -

    okay

    -```````````````````````````````` - - -A declaration (type 4): - -```````````````````````````````` example - -. - -```````````````````````````````` - - -CDATA (type 5): - -```````````````````````````````` example - -okay -. - -

    okay

    -```````````````````````````````` - - -The opening tag can be indented 1-3 spaces, but not 4: - -```````````````````````````````` example - - - -. - -
    <!-- foo -->
    -
    -```````````````````````````````` - - -```````````````````````````````` example -
    - -
    -. -
    -
    <div>
    -
    -```````````````````````````````` - - -An HTML block of types 1--6 can interrupt a paragraph, and need not be -preceded by a blank line. - -```````````````````````````````` example -Foo -
    -bar -
    -. -

    Foo

    -
    -bar -
    -```````````````````````````````` - - -However, a following blank line is needed, except at the end of -a document, and except for blocks of types 1--5, [above][HTML -block]: - -```````````````````````````````` example -
    -bar -
    -*foo* -. -
    -bar -
    -*foo* -```````````````````````````````` - - -HTML blocks of type 7 cannot interrupt a paragraph: - -```````````````````````````````` example -Foo - -baz -. -

    Foo - -baz

    -```````````````````````````````` - - -This rule differs from John Gruber's original Markdown syntax -specification, which says: - -> The only restrictions are that block-level HTML elements — -> e.g. `
    `, ``, `
    `, `

    `, etc. — must be separated from -> surrounding content by blank lines, and the start and end tags of the -> block should not be indented with tabs or spaces. - -In some ways Gruber's rule is more restrictive than the one given -here: - -- It requires that an HTML block be preceded by a blank line. -- It does not allow the start tag to be indented. -- It requires a matching end tag, which it also does not allow to - be indented. - -Most Markdown implementations (including some of Gruber's own) do not -respect all of these restrictions. - -There is one respect, however, in which Gruber's rule is more liberal -than the one given here, since it allows blank lines to occur inside -an HTML block. There are two reasons for disallowing them here. -First, it removes the need to parse balanced tags, which is -expensive and can require backtracking from the end of the document -if no matching end tag is found. Second, it provides a very simple -and flexible way of including Markdown content inside HTML tags: -simply separate the Markdown from the HTML using blank lines: - -Compare: - -```````````````````````````````` example -

    - -*Emphasized* text. - -
    -. -
    -

    Emphasized text.

    -
    -```````````````````````````````` - - -```````````````````````````````` example -
    -*Emphasized* text. -
    -. -
    -*Emphasized* text. -
    -```````````````````````````````` - - -Some Markdown implementations have adopted a convention of -interpreting content inside tags as text if the open tag has -the attribute `markdown=1`. The rule given above seems a simpler and -more elegant way of achieving the same expressive power, which is also -much simpler to parse. - -The main potential drawback is that one can no longer paste HTML -blocks into Markdown documents with 100% reliability. However, -*in most cases* this will work fine, because the blank lines in -HTML are usually followed by HTML block tags. For example: - -```````````````````````````````` example -
    - - - - - - - -
    -Hi -
    -. - - - - -
    -Hi -
    -```````````````````````````````` - - -There are problems, however, if the inner tags are indented -*and* separated by spaces, as then they will be interpreted as -an indented code block: - -```````````````````````````````` example - - - - - - - - -
    - Hi -
    -. - - -
    <td>
    -  Hi
    -</td>
    -
    - -
    -```````````````````````````````` - - -Fortunately, blank lines are usually not necessary and can be -deleted. The exception is inside `
    ` tags, but as described
    -[above][HTML blocks], raw HTML blocks starting with `
    `
    -*can* contain blank lines.
    -
    -## Link reference definitions
    -
    -A [link reference definition](@)
    -consists of a [link label], indented up to three spaces, followed
    -by a colon (`:`), optional [whitespace] (including up to one
    -[line ending]), a [link destination],
    -optional [whitespace] (including up to one
    -[line ending]), and an optional [link
    -title], which if it is present must be separated
    -from the [link destination] by [whitespace].
    -No further [non-whitespace characters] may occur on the line.
    -
    -A [link reference definition]
    -does not correspond to a structural element of a document.  Instead, it
    -defines a label which can be used in [reference links]
    -and reference-style [images] elsewhere in the document.  [Link
    -reference definitions] can come either before or after the links that use
    -them.
    -
    -```````````````````````````````` example
    -[foo]: /url "title"
    -
    -[foo]
    -.
    -

    foo

    -```````````````````````````````` - - -```````````````````````````````` example - [foo]: - /url - 'the title' - -[foo] -. -

    foo

    -```````````````````````````````` - - -```````````````````````````````` example -[Foo*bar\]]:my_(url) 'title (with parens)' - -[Foo*bar\]] -. -

    Foo*bar]

    -```````````````````````````````` - - -```````````````````````````````` example -[Foo bar]: - -'title' - -[Foo bar] -. -

    Foo bar

    -```````````````````````````````` - - -The title may extend over multiple lines: - -```````````````````````````````` example -[foo]: /url ' -title -line1 -line2 -' - -[foo] -. -

    foo

    -```````````````````````````````` - - -However, it may not contain a [blank line]: - -```````````````````````````````` example -[foo]: /url 'title - -with blank line' - -[foo] -. -

    [foo]: /url 'title

    -

    with blank line'

    -

    [foo]

    -```````````````````````````````` - - -The title may be omitted: - -```````````````````````````````` example -[foo]: -/url - -[foo] -. -

    foo

    -```````````````````````````````` - - -The link destination may not be omitted: - -```````````````````````````````` example -[foo]: - -[foo] -. -

    [foo]:

    -

    [foo]

    -```````````````````````````````` - - However, an empty link destination may be specified using - angle brackets: - -```````````````````````````````` example -[foo]: <> - -[foo] -. -

    foo

    -```````````````````````````````` - -The title must be separated from the link destination by -whitespace: - -```````````````````````````````` example -[foo]: (baz) - -[foo] -. -

    [foo]: (baz)

    -

    [foo]

    -```````````````````````````````` - - -Both title and destination can contain backslash escapes -and literal backslashes: - -```````````````````````````````` example -[foo]: /url\bar\*baz "foo\"bar\baz" - -[foo] -. -

    foo

    -```````````````````````````````` - - -A link can come before its corresponding definition: - -```````````````````````````````` example -[foo] - -[foo]: url -. -

    foo

    -```````````````````````````````` - - -If there are several matching definitions, the first one takes -precedence: - -```````````````````````````````` example -[foo] - -[foo]: first -[foo]: second -. -

    foo

    -```````````````````````````````` - - -As noted in the section on [Links], matching of labels is -case-insensitive (see [matches]). - -```````````````````````````````` example -[FOO]: /url - -[Foo] -. -

    Foo

    -```````````````````````````````` - - -```````````````````````````````` example -[ΑΓΩ]: /φου - -[αγω] -. -

    αγω

    -```````````````````````````````` - - -Here is a link reference definition with no corresponding link. -It contributes nothing to the document. - -```````````````````````````````` example -[foo]: /url -. -```````````````````````````````` - - -Here is another one: - -```````````````````````````````` example -[ -foo -]: /url -bar -. -

    bar

    -```````````````````````````````` - - -This is not a link reference definition, because there are -[non-whitespace characters] after the title: - -```````````````````````````````` example -[foo]: /url "title" ok -. -

    [foo]: /url "title" ok

    -```````````````````````````````` - - -This is a link reference definition, but it has no title: - -```````````````````````````````` example -[foo]: /url -"title" ok -. -

    "title" ok

    -```````````````````````````````` - - -This is not a link reference definition, because it is indented -four spaces: - -```````````````````````````````` example - [foo]: /url "title" - -[foo] -. -
    [foo]: /url "title"
    -
    -

    [foo]

    -```````````````````````````````` - - -This is not a link reference definition, because it occurs inside -a code block: - -```````````````````````````````` example -``` -[foo]: /url -``` - -[foo] -. -
    [foo]: /url
    -
    -

    [foo]

    -```````````````````````````````` - - -A [link reference definition] cannot interrupt a paragraph. - -```````````````````````````````` example -Foo -[bar]: /baz - -[bar] -. -

    Foo -[bar]: /baz

    -

    [bar]

    -```````````````````````````````` - - -However, it can directly follow other block elements, such as headings -and thematic breaks, and it need not be followed by a blank line. - -```````````````````````````````` example -# [Foo] -[foo]: /url -> bar -. -

    Foo

    -
    -

    bar

    -
    -```````````````````````````````` - -```````````````````````````````` example -[foo]: /url -bar -=== -[foo] -. -

    bar

    -

    foo

    -```````````````````````````````` - -```````````````````````````````` example -[foo]: /url -=== -[foo] -. -

    === -foo

    -```````````````````````````````` - - -Several [link reference definitions] -can occur one after another, without intervening blank lines. - -```````````````````````````````` example -[foo]: /foo-url "foo" -[bar]: /bar-url - "bar" -[baz]: /baz-url - -[foo], -[bar], -[baz] -. -

    foo, -bar, -baz

    -```````````````````````````````` - - -[Link reference definitions] can occur -inside block containers, like lists and block quotations. They -affect the entire document, not just the container in which they -are defined: - -```````````````````````````````` example -[foo] - -> [foo]: /url -. -

    foo

    -
    -
    -```````````````````````````````` - - -Whether something is a [link reference definition] is -independent of whether the link reference it defines is -used in the document. Thus, for example, the following -document contains just a link reference definition, and -no visible content: - -```````````````````````````````` example -[foo]: /url -. -```````````````````````````````` - - -## Paragraphs - -A sequence of non-blank lines that cannot be interpreted as other -kinds of blocks forms a [paragraph](@). -The contents of the paragraph are the result of parsing the -paragraph's raw content as inlines. The paragraph's raw content -is formed by concatenating the lines and removing initial and final -[whitespace]. - -A simple example with two paragraphs: - -```````````````````````````````` example -aaa - -bbb -. -

    aaa

    -

    bbb

    -```````````````````````````````` - - -Paragraphs can contain multiple lines, but no blank lines: - -```````````````````````````````` example -aaa -bbb - -ccc -ddd -. -

    aaa -bbb

    -

    ccc -ddd

    -```````````````````````````````` - - -Multiple blank lines between paragraph have no effect: - -```````````````````````````````` example -aaa - - -bbb -. -

    aaa

    -

    bbb

    -```````````````````````````````` - - -Leading spaces are skipped: - -```````````````````````````````` example - aaa - bbb -. -

    aaa -bbb

    -```````````````````````````````` - - -Lines after the first may be indented any amount, since indented -code blocks cannot interrupt paragraphs. - -```````````````````````````````` example -aaa - bbb - ccc -. -

    aaa -bbb -ccc

    -```````````````````````````````` - - -However, the first line may be indented at most three spaces, -or an indented code block will be triggered: - -```````````````````````````````` example - aaa -bbb -. -

    aaa -bbb

    -```````````````````````````````` - - -```````````````````````````````` example - aaa -bbb -. -
    aaa
    -
    -

    bbb

    -```````````````````````````````` - - -Final spaces are stripped before inline parsing, so a paragraph -that ends with two or more spaces will not end with a [hard line -break]: - -```````````````````````````````` example -aaa -bbb -. -

    aaa
    -bbb

    -```````````````````````````````` - - -## Blank lines - -[Blank lines] between block-level elements are ignored, -except for the role they play in determining whether a [list] -is [tight] or [loose]. - -Blank lines at the beginning and end of the document are also ignored. - -```````````````````````````````` example - - -aaa - - -# aaa - - -. -

    aaa

    -

    aaa

    -```````````````````````````````` - - - -# Container blocks - -A [container block](#container-blocks) is a block that has other -blocks as its contents. There are two basic kinds of container blocks: -[block quotes] and [list items]. -[Lists] are meta-containers for [list items]. - -We define the syntax for container blocks recursively. The general -form of the definition is: - -> If X is a sequence of blocks, then the result of -> transforming X in such-and-such a way is a container of type Y -> with these blocks as its content. - -So, we explain what counts as a block quote or list item by explaining -how these can be *generated* from their contents. This should suffice -to define the syntax, although it does not give a recipe for *parsing* -these constructions. (A recipe is provided below in the section entitled -[A parsing strategy](#appendix-a-parsing-strategy).) - -## Block quotes - -A [block quote marker](@) -consists of 0-3 spaces of initial indent, plus (a) the character `>` together -with a following space, or (b) a single character `>` not followed by a space. - -The following rules define [block quotes]: - -1. **Basic case.** If a string of lines *Ls* constitute a sequence - of blocks *Bs*, then the result of prepending a [block quote - marker] to the beginning of each line in *Ls* - is a [block quote](#block-quotes) containing *Bs*. - -2. **Laziness.** If a string of lines *Ls* constitute a [block - quote](#block-quotes) with contents *Bs*, then the result of deleting - the initial [block quote marker] from one or - more lines in which the next [non-whitespace character] after the [block - quote marker] is [paragraph continuation - text] is a block quote with *Bs* as its content. - [Paragraph continuation text](@) is text - that will be parsed as part of the content of a paragraph, but does - not occur at the beginning of the paragraph. - -3. **Consecutiveness.** A document cannot contain two [block - quotes] in a row unless there is a [blank line] between them. - -Nothing else counts as a [block quote](#block-quotes). - -Here is a simple example: - -```````````````````````````````` example -> # Foo -> bar -> baz -. -
    -

    Foo

    -

    bar -baz

    -
    -```````````````````````````````` - - -The spaces after the `>` characters can be omitted: - -```````````````````````````````` example -># Foo ->bar -> baz -. -
    -

    Foo

    -

    bar -baz

    -
    -```````````````````````````````` - - -The `>` characters can be indented 1-3 spaces: - -```````````````````````````````` example - > # Foo - > bar - > baz -. -
    -

    Foo

    -

    bar -baz

    -
    -```````````````````````````````` - - -Four spaces gives us a code block: - -```````````````````````````````` example - > # Foo - > bar - > baz -. -
    > # Foo
    -> bar
    -> baz
    -
    -```````````````````````````````` - - -The Laziness clause allows us to omit the `>` before -[paragraph continuation text]: - -```````````````````````````````` example -> # Foo -> bar -baz -. -
    -

    Foo

    -

    bar -baz

    -
    -```````````````````````````````` - - -A block quote can contain some lazy and some non-lazy -continuation lines: - -```````````````````````````````` example -> bar -baz -> foo -. -
    -

    bar -baz -foo

    -
    -```````````````````````````````` - - -Laziness only applies to lines that would have been continuations of -paragraphs had they been prepended with [block quote markers]. -For example, the `> ` cannot be omitted in the second line of - -``` markdown -> foo -> --- -``` - -without changing the meaning: - -```````````````````````````````` example -> foo ---- -. -
    -

    foo

    -
    -
    -```````````````````````````````` - - -Similarly, if we omit the `> ` in the second line of - -``` markdown -> - foo -> - bar -``` - -then the block quote ends after the first line: - -```````````````````````````````` example -> - foo -- bar -. -
    -
      -
    • foo
    • -
    -
    -
      -
    • bar
    • -
    -```````````````````````````````` - - -For the same reason, we can't omit the `> ` in front of -subsequent lines of an indented or fenced code block: - -```````````````````````````````` example -> foo - bar -. -
    -
    foo
    -
    -
    -
    bar
    -
    -```````````````````````````````` - - -```````````````````````````````` example -> ``` -foo -``` -. -
    -
    -
    -

    foo

    -
    -```````````````````````````````` - - -Note that in the following case, we have a [lazy -continuation line]: - -```````````````````````````````` example -> foo - - bar -. -
    -

    foo -- bar

    -
    -```````````````````````````````` - - -To see why, note that in - -```markdown -> foo -> - bar -``` - -the `- bar` is indented too far to start a list, and can't -be an indented code block because indented code blocks cannot -interrupt paragraphs, so it is [paragraph continuation text]. - -A block quote can be empty: - -```````````````````````````````` example -> -. -
    -
    -```````````````````````````````` - - -```````````````````````````````` example -> -> -> -. -
    -
    -```````````````````````````````` - - -A block quote can have initial or final blank lines: - -```````````````````````````````` example -> -> foo -> -. -
    -

    foo

    -
    -```````````````````````````````` - - -A blank line always separates block quotes: - -```````````````````````````````` example -> foo - -> bar -. -
    -

    foo

    -
    -
    -

    bar

    -
    -```````````````````````````````` - - -(Most current Markdown implementations, including John Gruber's -original `Markdown.pl`, will parse this example as a single block quote -with two paragraphs. But it seems better to allow the author to decide -whether two block quotes or one are wanted.) - -Consecutiveness means that if we put these block quotes together, -we get a single block quote: - -```````````````````````````````` example -> foo -> bar -. -
    -

    foo -bar

    -
    -```````````````````````````````` - - -To get a block quote with two paragraphs, use: - -```````````````````````````````` example -> foo -> -> bar -. -
    -

    foo

    -

    bar

    -
    -```````````````````````````````` - - -Block quotes can interrupt paragraphs: - -```````````````````````````````` example -foo -> bar -. -

    foo

    -
    -

    bar

    -
    -```````````````````````````````` - - -In general, blank lines are not needed before or after block -quotes: - -```````````````````````````````` example -> aaa -*** -> bbb -. -
    -

    aaa

    -
    -
    -
    -

    bbb

    -
    -```````````````````````````````` - - -However, because of laziness, a blank line is needed between -a block quote and a following paragraph: - -```````````````````````````````` example -> bar -baz -. -
    -

    bar -baz

    -
    -```````````````````````````````` - - -```````````````````````````````` example -> bar - -baz -. -
    -

    bar

    -
    -

    baz

    -```````````````````````````````` - - -```````````````````````````````` example -> bar -> -baz -. -
    -

    bar

    -
    -

    baz

    -```````````````````````````````` - - -It is a consequence of the Laziness rule that any number -of initial `>`s may be omitted on a continuation line of a -nested block quote: - -```````````````````````````````` example -> > > foo -bar -. -
    -
    -
    -

    foo -bar

    -
    -
    -
    -```````````````````````````````` - - -```````````````````````````````` example ->>> foo -> bar ->>baz -. -
    -
    -
    -

    foo -bar -baz

    -
    -
    -
    -```````````````````````````````` - - -When including an indented code block in a block quote, -remember that the [block quote marker] includes -both the `>` and a following space. So *five spaces* are needed after -the `>`: - -```````````````````````````````` example -> code - -> not code -. -
    -
    code
    -
    -
    -
    -

    not code

    -
    -```````````````````````````````` - - - -## List items - -A [list marker](@) is a -[bullet list marker] or an [ordered list marker]. - -A [bullet list marker](@) -is a `-`, `+`, or `*` character. - -An [ordered list marker](@) -is a sequence of 1--9 arabic digits (`0-9`), followed by either a -`.` character or a `)` character. (The reason for the length -limit is that with 10 digits we start seeing integer overflows -in some browsers.) - -The following rules define [list items]: - -1. **Basic case.** If a sequence of lines *Ls* constitute a sequence of - blocks *Bs* starting with a [non-whitespace character], and *M* is a - list marker of width *W* followed by 1 ≤ *N* ≤ 4 spaces, then the result - of prepending *M* and the following spaces to the first line of - *Ls*, and indenting subsequent lines of *Ls* by *W + N* spaces, is a - list item with *Bs* as its contents. The type of the list item - (bullet or ordered) is determined by the type of its list marker. - If the list item is ordered, then it is also assigned a start - number, based on the ordered list marker. - - Exceptions: - - 1. When the first list item in a [list] interrupts - a paragraph---that is, when it starts on a line that would - otherwise count as [paragraph continuation text]---then (a) - the lines *Ls* must not begin with a blank line, and (b) if - the list item is ordered, the start number must be 1. - 2. If any line is a [thematic break][thematic breaks] then - that line is not a list item. - -For example, let *Ls* be the lines - -```````````````````````````````` example -A paragraph -with two lines. - - indented code - -> A block quote. -. -

    A paragraph -with two lines.

    -
    indented code
    -
    -
    -

    A block quote.

    -
    -```````````````````````````````` - - -And let *M* be the marker `1.`, and *N* = 2. Then rule #1 says -that the following is an ordered list item with start number 1, -and the same contents as *Ls*: - -```````````````````````````````` example -1. A paragraph - with two lines. - - indented code - - > A block quote. -. -
      -
    1. -

      A paragraph -with two lines.

      -
      indented code
      -
      -
      -

      A block quote.

      -
      -
    2. -
    -```````````````````````````````` - - -The most important thing to notice is that the position of -the text after the list marker determines how much indentation -is needed in subsequent blocks in the list item. If the list -marker takes up two spaces, and there are three spaces between -the list marker and the next [non-whitespace character], then blocks -must be indented five spaces in order to fall under the list -item. - -Here are some examples showing how far content must be indented to be -put under the list item: - -```````````````````````````````` example -- one - - two -. -
      -
    • one
    • -
    -

    two

    -```````````````````````````````` - - -```````````````````````````````` example -- one - - two -. -
      -
    • -

      one

      -

      two

      -
    • -
    -```````````````````````````````` - - -```````````````````````````````` example - - one - - two -. -
      -
    • one
    • -
    -
     two
    -
    -```````````````````````````````` - - -```````````````````````````````` example - - one - - two -. -
      -
    • -

      one

      -

      two

      -
    • -
    -```````````````````````````````` - - -It is tempting to think of this in terms of columns: the continuation -blocks must be indented at least to the column of the first -[non-whitespace character] after the list marker. However, that is not quite right. -The spaces after the list marker determine how much relative indentation -is needed. Which column this indentation reaches will depend on -how the list item is embedded in other constructions, as shown by -this example: - -```````````````````````````````` example - > > 1. one ->> ->> two -. -
    -
    -
      -
    1. -

      one

      -

      two

      -
    2. -
    -
    -
    -```````````````````````````````` - - -Here `two` occurs in the same column as the list marker `1.`, -but is actually contained in the list item, because there is -sufficient indentation after the last containing blockquote marker. - -The converse is also possible. In the following example, the word `two` -occurs far to the right of the initial text of the list item, `one`, but -it is not considered part of the list item, because it is not indented -far enough past the blockquote marker: - -```````````````````````````````` example ->>- one ->> - > > two -. -
    -
    -
      -
    • one
    • -
    -

    two

    -
    -
    -```````````````````````````````` - - -Note that at least one space is needed between the list marker and -any following content, so these are not list items: - -```````````````````````````````` example --one - -2.two -. -

    -one

    -

    2.two

    -```````````````````````````````` - - -A list item may contain blocks that are separated by more than -one blank line. - -```````````````````````````````` example -- foo - - - bar -. -
      -
    • -

      foo

      -

      bar

      -
    • -
    -```````````````````````````````` - - -A list item may contain any kind of block: - -```````````````````````````````` example -1. foo - - ``` - bar - ``` - - baz - - > bam -. -
      -
    1. -

      foo

      -
      bar
      -
      -

      baz

      -
      -

      bam

      -
      -
    2. -
    -```````````````````````````````` - - -A list item that contains an indented code block will preserve -empty lines within the code block verbatim. - -```````````````````````````````` example -- Foo - - bar - - - baz -. -
      -
    • -

      Foo

      -
      bar
      -
      -
      -baz
      -
      -
    • -
    -```````````````````````````````` - -Note that ordered list start numbers must be nine digits or less: - -```````````````````````````````` example -123456789. ok -. -
      -
    1. ok
    2. -
    -```````````````````````````````` - - -```````````````````````````````` example -1234567890. not ok -. -

    1234567890. not ok

    -```````````````````````````````` - - -A start number may begin with 0s: - -```````````````````````````````` example -0. ok -. -
      -
    1. ok
    2. -
    -```````````````````````````````` - - -```````````````````````````````` example -003. ok -. -
      -
    1. ok
    2. -
    -```````````````````````````````` - - -A start number may not be negative: - -```````````````````````````````` example --1. not ok -. -

    -1. not ok

    -```````````````````````````````` - - - -2. **Item starting with indented code.** If a sequence of lines *Ls* - constitute a sequence of blocks *Bs* starting with an indented code - block, and *M* is a list marker of width *W* followed by - one space, then the result of prepending *M* and the following - space to the first line of *Ls*, and indenting subsequent lines of - *Ls* by *W + 1* spaces, is a list item with *Bs* as its contents. - If a line is empty, then it need not be indented. The type of the - list item (bullet or ordered) is determined by the type of its list - marker. If the list item is ordered, then it is also assigned a - start number, based on the ordered list marker. - -An indented code block will have to be indented four spaces beyond -the edge of the region where text will be included in the list item. -In the following case that is 6 spaces: - -```````````````````````````````` example -- foo - - bar -. -
      -
    • -

      foo

      -
      bar
      -
      -
    • -
    -```````````````````````````````` - - -And in this case it is 11 spaces: - -```````````````````````````````` example - 10. foo - - bar -. -
      -
    1. -

      foo

      -
      bar
      -
      -
    2. -
    -```````````````````````````````` - - -If the *first* block in the list item is an indented code block, -then by rule #2, the contents must be indented *one* space after the -list marker: - -```````````````````````````````` example - indented code - -paragraph - - more code -. -
    indented code
    -
    -

    paragraph

    -
    more code
    -
    -```````````````````````````````` - - -```````````````````````````````` example -1. indented code - - paragraph - - more code -. -
      -
    1. -
      indented code
      -
      -

      paragraph

      -
      more code
      -
      -
    2. -
    -```````````````````````````````` - - -Note that an additional space indent is interpreted as space -inside the code block: - -```````````````````````````````` example -1. indented code - - paragraph - - more code -. -
      -
    1. -
       indented code
      -
      -

      paragraph

      -
      more code
      -
      -
    2. -
    -```````````````````````````````` - - -Note that rules #1 and #2 only apply to two cases: (a) cases -in which the lines to be included in a list item begin with a -[non-whitespace character], and (b) cases in which -they begin with an indented code -block. In a case like the following, where the first block begins with -a three-space indent, the rules do not allow us to form a list item by -indenting the whole thing and prepending a list marker: - -```````````````````````````````` example - foo - -bar -. -

    foo

    -

    bar

    -```````````````````````````````` - - -```````````````````````````````` example -- foo - - bar -. -
      -
    • foo
    • -
    -

    bar

    -```````````````````````````````` - - -This is not a significant restriction, because when a block begins -with 1-3 spaces indent, the indentation can always be removed without -a change in interpretation, allowing rule #1 to be applied. So, in -the above case: - -```````````````````````````````` example -- foo - - bar -. -
      -
    • -

      foo

      -

      bar

      -
    • -
    -```````````````````````````````` - - -3. **Item starting with a blank line.** If a sequence of lines *Ls* - starting with a single [blank line] constitute a (possibly empty) - sequence of blocks *Bs*, not separated from each other by more than - one blank line, and *M* is a list marker of width *W*, - then the result of prepending *M* to the first line of *Ls*, and - indenting subsequent lines of *Ls* by *W + 1* spaces, is a list - item with *Bs* as its contents. - If a line is empty, then it need not be indented. The type of the - list item (bullet or ordered) is determined by the type of its list - marker. If the list item is ordered, then it is also assigned a - start number, based on the ordered list marker. - -Here are some list items that start with a blank line but are not empty: - -```````````````````````````````` example -- - foo -- - ``` - bar - ``` -- - baz -. -
      -
    • foo
    • -
    • -
      bar
      -
      -
    • -
    • -
      baz
      -
      -
    • -
    -```````````````````````````````` - -When the list item starts with a blank line, the number of spaces -following the list marker doesn't change the required indentation: - -```````````````````````````````` example -- - foo -. -
      -
    • foo
    • -
    -```````````````````````````````` - - -A list item can begin with at most one blank line. -In the following example, `foo` is not part of the list -item: - -```````````````````````````````` example -- - - foo -. -
      -
    • -
    -

    foo

    -```````````````````````````````` - - -Here is an empty bullet list item: - -```````````````````````````````` example -- foo -- -- bar -. -
      -
    • foo
    • -
    • -
    • bar
    • -
    -```````````````````````````````` - - -It does not matter whether there are spaces following the [list marker]: - -```````````````````````````````` example -- foo -- -- bar -. -
      -
    • foo
    • -
    • -
    • bar
    • -
    -```````````````````````````````` - - -Here is an empty ordered list item: - -```````````````````````````````` example -1. foo -2. -3. bar -. -
      -
    1. foo
    2. -
    3. -
    4. bar
    5. -
    -```````````````````````````````` - - -A list may start or end with an empty list item: - -```````````````````````````````` example -* -. -
      -
    • -
    -```````````````````````````````` - -However, an empty list item cannot interrupt a paragraph: - -```````````````````````````````` example -foo -* - -foo -1. -. -

    foo -*

    -

    foo -1.

    -```````````````````````````````` - - -4. **Indentation.** If a sequence of lines *Ls* constitutes a list item - according to rule #1, #2, or #3, then the result of indenting each line - of *Ls* by 1-3 spaces (the same for each line) also constitutes a - list item with the same contents and attributes. If a line is - empty, then it need not be indented. - -Indented one space: - -```````````````````````````````` example - 1. A paragraph - with two lines. - - indented code - - > A block quote. -. -
      -
    1. -

      A paragraph -with two lines.

      -
      indented code
      -
      -
      -

      A block quote.

      -
      -
    2. -
    -```````````````````````````````` - - -Indented two spaces: - -```````````````````````````````` example - 1. A paragraph - with two lines. - - indented code - - > A block quote. -. -
      -
    1. -

      A paragraph -with two lines.

      -
      indented code
      -
      -
      -

      A block quote.

      -
      -
    2. -
    -```````````````````````````````` - - -Indented three spaces: - -```````````````````````````````` example - 1. A paragraph - with two lines. - - indented code - - > A block quote. -. -
      -
    1. -

      A paragraph -with two lines.

      -
      indented code
      -
      -
      -

      A block quote.

      -
      -
    2. -
    -```````````````````````````````` - - -Four spaces indent gives a code block: - -```````````````````````````````` example - 1. A paragraph - with two lines. - - indented code - - > A block quote. -. -
    1.  A paragraph
    -    with two lines.
    -
    -        indented code
    -
    -    > A block quote.
    -
    -```````````````````````````````` - - - -5. **Laziness.** If a string of lines *Ls* constitute a [list - item](#list-items) with contents *Bs*, then the result of deleting - some or all of the indentation from one or more lines in which the - next [non-whitespace character] after the indentation is - [paragraph continuation text] is a - list item with the same contents and attributes. The unindented - lines are called - [lazy continuation line](@)s. - -Here is an example with [lazy continuation lines]: - -```````````````````````````````` example - 1. A paragraph -with two lines. - - indented code - - > A block quote. -. -
      -
    1. -

      A paragraph -with two lines.

      -
      indented code
      -
      -
      -

      A block quote.

      -
      -
    2. -
    -```````````````````````````````` - - -Indentation can be partially deleted: - -```````````````````````````````` example - 1. A paragraph - with two lines. -. -
      -
    1. A paragraph -with two lines.
    2. -
    -```````````````````````````````` - - -These examples show how laziness can work in nested structures: - -```````````````````````````````` example -> 1. > Blockquote -continued here. -. -
    -
      -
    1. -
      -

      Blockquote -continued here.

      -
      -
    2. -
    -
    -```````````````````````````````` - - -```````````````````````````````` example -> 1. > Blockquote -> continued here. -. -
    -
      -
    1. -
      -

      Blockquote -continued here.

      -
      -
    2. -
    -
    -```````````````````````````````` - - - -6. **That's all.** Nothing that is not counted as a list item by rules - #1--5 counts as a [list item](#list-items). - -The rules for sublists follow from the general rules -[above][List items]. A sublist must be indented the same number -of spaces a paragraph would need to be in order to be included -in the list item. - -So, in this case we need two spaces indent: - -```````````````````````````````` example -- foo - - bar - - baz - - boo -. -
      -
    • foo -
        -
      • bar -
          -
        • baz -
            -
          • boo
          • -
          -
        • -
        -
      • -
      -
    • -
    -```````````````````````````````` - - -One is not enough: - -```````````````````````````````` example -- foo - - bar - - baz - - boo -. -
      -
    • foo
    • -
    • bar
    • -
    • baz
    • -
    • boo
    • -
    -```````````````````````````````` - - -Here we need four, because the list marker is wider: - -```````````````````````````````` example -10) foo - - bar -. -
      -
    1. foo -
        -
      • bar
      • -
      -
    2. -
    -```````````````````````````````` - - -Three is not enough: - -```````````````````````````````` example -10) foo - - bar -. -
      -
    1. foo
    2. -
    -
      -
    • bar
    • -
    -```````````````````````````````` - - -A list may be the first block in a list item: - -```````````````````````````````` example -- - foo -. -
      -
    • -
        -
      • foo
      • -
      -
    • -
    -```````````````````````````````` - - -```````````````````````````````` example -1. - 2. foo -. -
      -
    1. -
        -
      • -
          -
        1. foo
        2. -
        -
      • -
      -
    2. -
    -```````````````````````````````` - - -A list item can contain a heading: - -```````````````````````````````` example -- # Foo -- Bar - --- - baz -. -
      -
    • -

      Foo

      -
    • -
    • -

      Bar

      -baz
    • -
    -```````````````````````````````` - - -### Motivation - -John Gruber's Markdown spec says the following about list items: - -1. "List markers typically start at the left margin, but may be indented - by up to three spaces. List markers must be followed by one or more - spaces or a tab." - -2. "To make lists look nice, you can wrap items with hanging indents.... - But if you don't want to, you don't have to." - -3. "List items may consist of multiple paragraphs. Each subsequent - paragraph in a list item must be indented by either 4 spaces or one - tab." - -4. "It looks nice if you indent every line of the subsequent paragraphs, - but here again, Markdown will allow you to be lazy." - -5. "To put a blockquote within a list item, the blockquote's `>` - delimiters need to be indented." - -6. "To put a code block within a list item, the code block needs to be - indented twice — 8 spaces or two tabs." - -These rules specify that a paragraph under a list item must be indented -four spaces (presumably, from the left margin, rather than the start of -the list marker, but this is not said), and that code under a list item -must be indented eight spaces instead of the usual four. They also say -that a block quote must be indented, but not by how much; however, the -example given has four spaces indentation. Although nothing is said -about other kinds of block-level content, it is certainly reasonable to -infer that *all* block elements under a list item, including other -lists, must be indented four spaces. This principle has been called the -*four-space rule*. - -The four-space rule is clear and principled, and if the reference -implementation `Markdown.pl` had followed it, it probably would have -become the standard. However, `Markdown.pl` allowed paragraphs and -sublists to start with only two spaces indentation, at least on the -outer level. Worse, its behavior was inconsistent: a sublist of an -outer-level list needed two spaces indentation, but a sublist of this -sublist needed three spaces. It is not surprising, then, that different -implementations of Markdown have developed very different rules for -determining what comes under a list item. (Pandoc and python-Markdown, -for example, stuck with Gruber's syntax description and the four-space -rule, while discount, redcarpet, marked, PHP Markdown, and others -followed `Markdown.pl`'s behavior more closely.) - -Unfortunately, given the divergences between implementations, there -is no way to give a spec for list items that will be guaranteed not -to break any existing documents. However, the spec given here should -correctly handle lists formatted with either the four-space rule or -the more forgiving `Markdown.pl` behavior, provided they are laid out -in a way that is natural for a human to read. - -The strategy here is to let the width and indentation of the list marker -determine the indentation necessary for blocks to fall under the list -item, rather than having a fixed and arbitrary number. The writer can -think of the body of the list item as a unit which gets indented to the -right enough to fit the list marker (and any indentation on the list -marker). (The laziness rule, #5, then allows continuation lines to be -unindented if needed.) - -This rule is superior, we claim, to any rule requiring a fixed level of -indentation from the margin. The four-space rule is clear but -unnatural. It is quite unintuitive that - -``` markdown -- foo - - bar - - - baz -``` - -should be parsed as two lists with an intervening paragraph, - -``` html -
      -
    • foo
    • -
    -

    bar

    -
      -
    • baz
    • -
    -``` - -as the four-space rule demands, rather than a single list, - -``` html -
      -
    • -

      foo

      -

      bar

      -
        -
      • baz
      • -
      -
    • -
    -``` - -The choice of four spaces is arbitrary. It can be learned, but it is -not likely to be guessed, and it trips up beginners regularly. - -Would it help to adopt a two-space rule? The problem is that such -a rule, together with the rule allowing 1--3 spaces indentation of the -initial list marker, allows text that is indented *less than* the -original list marker to be included in the list item. For example, -`Markdown.pl` parses - -``` markdown - - one - - two -``` - -as a single list item, with `two` a continuation paragraph: - -``` html -
      -
    • -

      one

      -

      two

      -
    • -
    -``` - -and similarly - -``` markdown -> - one -> -> two -``` - -as - -``` html -
    -
      -
    • -

      one

      -

      two

      -
    • -
    -
    -``` - -This is extremely unintuitive. - -Rather than requiring a fixed indent from the margin, we could require -a fixed indent (say, two spaces, or even one space) from the list marker (which -may itself be indented). This proposal would remove the last anomaly -discussed. Unlike the spec presented above, it would count the following -as a list item with a subparagraph, even though the paragraph `bar` -is not indented as far as the first paragraph `foo`: - -``` markdown - 10. foo - - bar -``` - -Arguably this text does read like a list item with `bar` as a subparagraph, -which may count in favor of the proposal. However, on this proposal indented -code would have to be indented six spaces after the list marker. And this -would break a lot of existing Markdown, which has the pattern: - -``` markdown -1. foo - - indented code -``` - -where the code is indented eight spaces. The spec above, by contrast, will -parse this text as expected, since the code block's indentation is measured -from the beginning of `foo`. - -The one case that needs special treatment is a list item that *starts* -with indented code. How much indentation is required in that case, since -we don't have a "first paragraph" to measure from? Rule #2 simply stipulates -that in such cases, we require one space indentation from the list marker -(and then the normal four spaces for the indented code). This will match the -four-space rule in cases where the list marker plus its initial indentation -takes four spaces (a common case), but diverge in other cases. - -## Lists - -A [list](@) is a sequence of one or more -list items [of the same type]. The list items -may be separated by any number of blank lines. - -Two list items are [of the same type](@) -if they begin with a [list marker] of the same type. -Two list markers are of the -same type if (a) they are bullet list markers using the same character -(`-`, `+`, or `*`) or (b) they are ordered list numbers with the same -delimiter (either `.` or `)`). - -A list is an [ordered list](@) -if its constituent list items begin with -[ordered list markers], and a -[bullet list](@) if its constituent list -items begin with [bullet list markers]. - -The [start number](@) -of an [ordered list] is determined by the list number of -its initial list item. The numbers of subsequent list items are -disregarded. - -A list is [loose](@) if any of its constituent -list items are separated by blank lines, or if any of its constituent -list items directly contain two block-level elements with a blank line -between them. Otherwise a list is [tight](@). -(The difference in HTML output is that paragraphs in a loose list are -wrapped in `

    ` tags, while paragraphs in a tight list are not.) - -Changing the bullet or ordered list delimiter starts a new list: - -```````````````````````````````` example -- foo -- bar -+ baz -. -

      -
    • foo
    • -
    • bar
    • -
    -
      -
    • baz
    • -
    -```````````````````````````````` - - -```````````````````````````````` example -1. foo -2. bar -3) baz -. -
      -
    1. foo
    2. -
    3. bar
    4. -
    -
      -
    1. baz
    2. -
    -```````````````````````````````` - - -In CommonMark, a list can interrupt a paragraph. That is, -no blank line is needed to separate a paragraph from a following -list: - -```````````````````````````````` example -Foo -- bar -- baz -. -

    Foo

    -
      -
    • bar
    • -
    • baz
    • -
    -```````````````````````````````` - -`Markdown.pl` does not allow this, through fear of triggering a list -via a numeral in a hard-wrapped line: - -``` markdown -The number of windows in my house is -14. The number of doors is 6. -``` - -Oddly, though, `Markdown.pl` *does* allow a blockquote to -interrupt a paragraph, even though the same considerations might -apply. - -In CommonMark, we do allow lists to interrupt paragraphs, for -two reasons. First, it is natural and not uncommon for people -to start lists without blank lines: - -``` markdown -I need to buy -- new shoes -- a coat -- a plane ticket -``` - -Second, we are attracted to a - -> [principle of uniformity](@): -> if a chunk of text has a certain -> meaning, it will continue to have the same meaning when put into a -> container block (such as a list item or blockquote). - -(Indeed, the spec for [list items] and [block quotes] presupposes -this principle.) This principle implies that if - -``` markdown - * I need to buy - - new shoes - - a coat - - a plane ticket -``` - -is a list item containing a paragraph followed by a nested sublist, -as all Markdown implementations agree it is (though the paragraph -may be rendered without `

    ` tags, since the list is "tight"), -then - -``` markdown -I need to buy -- new shoes -- a coat -- a plane ticket -``` - -by itself should be a paragraph followed by a nested sublist. - -Since it is well established Markdown practice to allow lists to -interrupt paragraphs inside list items, the [principle of -uniformity] requires us to allow this outside list items as -well. ([reStructuredText](http://docutils.sourceforge.net/rst.html) -takes a different approach, requiring blank lines before lists -even inside other list items.) - -In order to solve of unwanted lists in paragraphs with -hard-wrapped numerals, we allow only lists starting with `1` to -interrupt paragraphs. Thus, - -```````````````````````````````` example -The number of windows in my house is -14. The number of doors is 6. -. -

    The number of windows in my house is -14. The number of doors is 6.

    -```````````````````````````````` - -We may still get an unintended result in cases like - -```````````````````````````````` example -The number of windows in my house is -1. The number of doors is 6. -. -

    The number of windows in my house is

    -
      -
    1. The number of doors is 6.
    2. -
    -```````````````````````````````` - -but this rule should prevent most spurious list captures. - -There can be any number of blank lines between items: - -```````````````````````````````` example -- foo - -- bar - - -- baz -. -
      -
    • -

      foo

      -
    • -
    • -

      bar

      -
    • -
    • -

      baz

      -
    • -
    -```````````````````````````````` - -```````````````````````````````` example -- foo - - bar - - baz - - - bim -. -
      -
    • foo -
        -
      • bar -
          -
        • -

          baz

          -

          bim

          -
        • -
        -
      • -
      -
    • -
    -```````````````````````````````` - - -To separate consecutive lists of the same type, or to separate a -list from an indented code block that would otherwise be parsed -as a subparagraph of the final list item, you can insert a blank HTML -comment: - -```````````````````````````````` example -- foo -- bar - - - -- baz -- bim -. -
      -
    • foo
    • -
    • bar
    • -
    - -
      -
    • baz
    • -
    • bim
    • -
    -```````````````````````````````` - - -```````````````````````````````` example -- foo - - notcode - -- foo - - - - code -. -
      -
    • -

      foo

      -

      notcode

      -
    • -
    • -

      foo

      -
    • -
    - -
    code
    -
    -```````````````````````````````` - - -List items need not be indented to the same level. The following -list items will be treated as items at the same list level, -since none is indented enough to belong to the previous list -item: - -```````````````````````````````` example -- a - - b - - c - - d - - e - - f -- g -. -
      -
    • a
    • -
    • b
    • -
    • c
    • -
    • d
    • -
    • e
    • -
    • f
    • -
    • g
    • -
    -```````````````````````````````` - - -```````````````````````````````` example -1. a - - 2. b - - 3. c -. -
      -
    1. -

      a

      -
    2. -
    3. -

      b

      -
    4. -
    5. -

      c

      -
    6. -
    -```````````````````````````````` - -Note, however, that list items may not be indented more than -three spaces. Here `- e` is treated as a paragraph continuation -line, because it is indented more than three spaces: - -```````````````````````````````` example -- a - - b - - c - - d - - e -. -
      -
    • a
    • -
    • b
    • -
    • c
    • -
    • d -- e
    • -
    -```````````````````````````````` - -And here, `3. c` is treated as in indented code block, -because it is indented four spaces and preceded by a -blank line. - -```````````````````````````````` example -1. a - - 2. b - - 3. c -. -
      -
    1. -

      a

      -
    2. -
    3. -

      b

      -
    4. -
    -
    3. c
    -
    -```````````````````````````````` - - -This is a loose list, because there is a blank line between -two of the list items: - -```````````````````````````````` example -- a -- b - -- c -. -
      -
    • -

      a

      -
    • -
    • -

      b

      -
    • -
    • -

      c

      -
    • -
    -```````````````````````````````` - - -So is this, with a empty second item: - -```````````````````````````````` example -* a -* - -* c -. -
      -
    • -

      a

      -
    • -
    • -
    • -

      c

      -
    • -
    -```````````````````````````````` - - -These are loose lists, even though there is no space between the items, -because one of the items directly contains two block-level elements -with a blank line between them: - -```````````````````````````````` example -- a -- b - - c -- d -. -
      -
    • -

      a

      -
    • -
    • -

      b

      -

      c

      -
    • -
    • -

      d

      -
    • -
    -```````````````````````````````` - - -```````````````````````````````` example -- a -- b - - [ref]: /url -- d -. -
      -
    • -

      a

      -
    • -
    • -

      b

      -
    • -
    • -

      d

      -
    • -
    -```````````````````````````````` - - -This is a tight list, because the blank lines are in a code block: - -```````````````````````````````` example -- a -- ``` - b - - - ``` -- c -. -
      -
    • a
    • -
    • -
      b
      -
      -
      -
      -
    • -
    • c
    • -
    -```````````````````````````````` - - -This is a tight list, because the blank line is between two -paragraphs of a sublist. So the sublist is loose while -the outer list is tight: - -```````````````````````````````` example -- a - - b - - c -- d -. -
      -
    • a -
        -
      • -

        b

        -

        c

        -
      • -
      -
    • -
    • d
    • -
    -```````````````````````````````` - - -This is a tight list, because the blank line is inside the -block quote: - -```````````````````````````````` example -* a - > b - > -* c -. -
      -
    • a -
      -

      b

      -
      -
    • -
    • c
    • -
    -```````````````````````````````` - - -This list is tight, because the consecutive block elements -are not separated by blank lines: - -```````````````````````````````` example -- a - > b - ``` - c - ``` -- d -. -
      -
    • a -
      -

      b

      -
      -
      c
      -
      -
    • -
    • d
    • -
    -```````````````````````````````` - - -A single-paragraph list is tight: - -```````````````````````````````` example -- a -. -
      -
    • a
    • -
    -```````````````````````````````` - - -```````````````````````````````` example -- a - - b -. -
      -
    • a -
        -
      • b
      • -
      -
    • -
    -```````````````````````````````` - - -This list is loose, because of the blank line between the -two block elements in the list item: - -```````````````````````````````` example -1. ``` - foo - ``` - - bar -. -
      -
    1. -
      foo
      -
      -

      bar

      -
    2. -
    -```````````````````````````````` - - -Here the outer list is loose, the inner list tight: - -```````````````````````````````` example -* foo - * bar - - baz -. -
      -
    • -

      foo

      -
        -
      • bar
      • -
      -

      baz

      -
    • -
    -```````````````````````````````` - - -```````````````````````````````` example -- a - - b - - c - -- d - - e - - f -. -
      -
    • -

      a

      -
        -
      • b
      • -
      • c
      • -
      -
    • -
    • -

      d

      -
        -
      • e
      • -
      • f
      • -
      -
    • -
    -```````````````````````````````` - - -# Inlines - -Inlines are parsed sequentially from the beginning of the character -stream to the end (left to right, in left-to-right languages). -Thus, for example, in - -```````````````````````````````` example -`hi`lo` -. -

    hilo`

    -```````````````````````````````` - -`hi` is parsed as code, leaving the backtick at the end as a literal -backtick. - - -## Backslash escapes - -Any ASCII punctuation character may be backslash-escaped: - -```````````````````````````````` example -\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~ -. -

    !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~

    -```````````````````````````````` - - -Backslashes before other characters are treated as literal -backslashes: - -```````````````````````````````` example -\→\A\a\ \3\φ\« -. -

    \→\A\a\ \3\φ\«

    -```````````````````````````````` - - -Escaped characters are treated as regular characters and do -not have their usual Markdown meanings: - -```````````````````````````````` example -\*not emphasized* -\
    not a tag -\[not a link](/foo) -\`not code` -1\. not a list -\* not a list -\# not a heading -\[foo]: /url "not a reference" -\ö not a character entity -. -

    *not emphasized* -<br/> not a tag -[not a link](/foo) -`not code` -1. not a list -* not a list -# not a heading -[foo]: /url "not a reference" -&ouml; not a character entity

    -```````````````````````````````` - - -If a backslash is itself escaped, the following character is not: - -```````````````````````````````` example -\\*emphasis* -. -

    \emphasis

    -```````````````````````````````` - - -A backslash at the end of the line is a [hard line break]: - -```````````````````````````````` example -foo\ -bar -. -

    foo
    -bar

    -```````````````````````````````` - - -Backslash escapes do not work in code blocks, code spans, autolinks, or -raw HTML: - -```````````````````````````````` example -`` \[\` `` -. -

    \[\`

    -```````````````````````````````` - - -```````````````````````````````` example - \[\] -. -
    \[\]
    -
    -```````````````````````````````` - - -```````````````````````````````` example -~~~ -\[\] -~~~ -. -
    \[\]
    -
    -```````````````````````````````` - - -```````````````````````````````` example - -. -

    http://example.com?find=\*

    -```````````````````````````````` - - -```````````````````````````````` example - -. - -```````````````````````````````` - - -But they work in all other contexts, including URLs and link titles, -link references, and [info strings] in [fenced code blocks]: - -```````````````````````````````` example -[foo](/bar\* "ti\*tle") -. -

    foo

    -```````````````````````````````` - - -```````````````````````````````` example -[foo] - -[foo]: /bar\* "ti\*tle" -. -

    foo

    -```````````````````````````````` - - -```````````````````````````````` example -``` foo\+bar -foo -``` -. -
    foo
    -
    -```````````````````````````````` - - - -## Entity and numeric character references - -Valid HTML entity references and numeric character references -can be used in place of the corresponding Unicode character, -with the following exceptions: - -- Entity and character references are not recognized in code - blocks and code spans. - -- Entity and character references cannot stand in place of - special characters that define structural elements in - CommonMark. For example, although `*` can be used - in place of a literal `*` character, `*` cannot replace - `*` in emphasis delimiters, bullet list markers, or thematic - breaks. - -Conforming CommonMark parsers need not store information about -whether a particular character was represented in the source -using a Unicode character or an entity reference. - -[Entity references](@) consist of `&` + any of the valid -HTML5 entity names + `;`. The -document -is used as an authoritative source for the valid entity -references and their corresponding code points. - -```````````````````````````````` example -  & © Æ Ď -¾ ℋ ⅆ -∲ ≧̸ -. -

      & © Æ Ď -¾ ℋ ⅆ -∲ ≧̸

    -```````````````````````````````` - - -[Decimal numeric character -references](@) -consist of `&#` + a string of 1--7 arabic digits + `;`. A -numeric character reference is parsed as the corresponding -Unicode character. Invalid Unicode code points will be replaced by -the REPLACEMENT CHARACTER (`U+FFFD`). For security reasons, -the code point `U+0000` will also be replaced by `U+FFFD`. - -```````````````````````````````` example -# Ӓ Ϡ � -. -

    # Ӓ Ϡ �

    -```````````````````````````````` - - -[Hexadecimal numeric character -references](@) consist of `&#` + -either `X` or `x` + a string of 1-6 hexadecimal digits + `;`. -They too are parsed as the corresponding Unicode character (this -time specified with a hexadecimal numeral instead of decimal). - -```````````````````````````````` example -" ആ ಫ -. -

    " ആ ಫ

    -```````````````````````````````` - - -Here are some nonentities: - -```````````````````````````````` example -  &x; &#; &#x; -� -&#abcdef0; -&ThisIsNotDefined; &hi?; -. -

    &nbsp &x; &#; &#x; -&#987654321; -&#abcdef0; -&ThisIsNotDefined; &hi?;

    -```````````````````````````````` - - -Although HTML5 does accept some entity references -without a trailing semicolon (such as `©`), these are not -recognized here, because it makes the grammar too ambiguous: - -```````````````````````````````` example -© -. -

    &copy

    -```````````````````````````````` - - -Strings that are not on the list of HTML5 named entities are not -recognized as entity references either: - -```````````````````````````````` example -&MadeUpEntity; -. -

    &MadeUpEntity;

    -```````````````````````````````` - - -Entity and numeric character references are recognized in any -context besides code spans or code blocks, including -URLs, [link titles], and [fenced code block][] [info strings]: - -```````````````````````````````` example - -. - -```````````````````````````````` - - -```````````````````````````````` example -[foo](/föö "föö") -. -

    foo

    -```````````````````````````````` - - -```````````````````````````````` example -[foo] - -[foo]: /föö "föö" -. -

    foo

    -```````````````````````````````` - - -```````````````````````````````` example -``` föö -foo -``` -. -
    foo
    -
    -```````````````````````````````` - - -Entity and numeric character references are treated as literal -text in code spans and code blocks: - -```````````````````````````````` example -`föö` -. -

    f&ouml;&ouml;

    -```````````````````````````````` - - -```````````````````````````````` example - föfö -. -
    f&ouml;f&ouml;
    -
    -```````````````````````````````` - - -Entity and numeric character references cannot be used -in place of symbols indicating structure in CommonMark -documents. - -```````````````````````````````` example -*foo* -*foo* -. -

    *foo* -foo

    -```````````````````````````````` - -```````````````````````````````` example -* foo - -* foo -. -

    * foo

    -
      -
    • foo
    • -
    -```````````````````````````````` - -```````````````````````````````` example -foo bar -. -

    foo - -bar

    -```````````````````````````````` - -```````````````````````````````` example - foo -. -

    →foo

    -```````````````````````````````` - - -```````````````````````````````` example -[a](url "tit") -. -

    [a](url "tit")

    -```````````````````````````````` - - -## Code spans - -A [backtick string](@) -is a string of one or more backtick characters (`` ` ``) that is neither -preceded nor followed by a backtick. - -A [code span](@) begins with a backtick string and ends with -a backtick string of equal length. The contents of the code span are -the characters between the two backtick strings, normalized in the -following ways: - -- First, [line endings] are converted to [spaces]. -- If the resulting string both begins *and* ends with a [space] - character, but does not consist entirely of [space] - characters, a single [space] character is removed from the - front and back. This allows you to include code that begins - or ends with backtick characters, which must be separated by - whitespace from the opening or closing backtick strings. - -This is a simple code span: - -```````````````````````````````` example -`foo` -. -

    foo

    -```````````````````````````````` - - -Here two backticks are used, because the code contains a backtick. -This example also illustrates stripping of a single leading and -trailing space: - -```````````````````````````````` example -`` foo ` bar `` -. -

    foo ` bar

    -```````````````````````````````` - - -This example shows the motivation for stripping leading and trailing -spaces: - -```````````````````````````````` example -` `` ` -. -

    ``

    -```````````````````````````````` - -Note that only *one* space is stripped: - -```````````````````````````````` example -` `` ` -. -

    ``

    -```````````````````````````````` - -The stripping only happens if the space is on both -sides of the string: - -```````````````````````````````` example -` a` -. -

    a

    -```````````````````````````````` - -Only [spaces], and not [unicode whitespace] in general, are -stripped in this way: - -```````````````````````````````` example -` b ` -. -

     b 

    -```````````````````````````````` - -No stripping occurs if the code span contains only spaces: - -```````````````````````````````` example -` ` -` ` -. -

      -

    -```````````````````````````````` - - -[Line endings] are treated like spaces: - -```````````````````````````````` example -`` -foo -bar -baz -`` -. -

    foo bar baz

    -```````````````````````````````` - -```````````````````````````````` example -`` -foo -`` -. -

    foo

    -```````````````````````````````` - - -Interior spaces are not collapsed: - -```````````````````````````````` example -`foo bar -baz` -. -

    foo bar baz

    -```````````````````````````````` - -Note that browsers will typically collapse consecutive spaces -when rendering `` elements, so it is recommended that -the following CSS be used: - - code{white-space: pre-wrap;} - - -Note that backslash escapes do not work in code spans. All backslashes -are treated literally: - -```````````````````````````````` example -`foo\`bar` -. -

    foo\bar`

    -```````````````````````````````` - - -Backslash escapes are never needed, because one can always choose a -string of *n* backtick characters as delimiters, where the code does -not contain any strings of exactly *n* backtick characters. - -```````````````````````````````` example -``foo`bar`` -. -

    foo`bar

    -```````````````````````````````` - -```````````````````````````````` example -` foo `` bar ` -. -

    foo `` bar

    -```````````````````````````````` - - -Code span backticks have higher precedence than any other inline -constructs except HTML tags and autolinks. Thus, for example, this is -not parsed as emphasized text, since the second `*` is part of a code -span: - -```````````````````````````````` example -*foo`*` -. -

    *foo*

    -```````````````````````````````` - - -And this is not parsed as a link: - -```````````````````````````````` example -[not a `link](/foo`) -. -

    [not a link](/foo)

    -```````````````````````````````` - - -Code spans, HTML tags, and autolinks have the same precedence. -Thus, this is code: - -```````````````````````````````` example -`` -. -

    <a href="">`

    -```````````````````````````````` - - -But this is an HTML tag: - -```````````````````````````````` example -
    ` -. -

    `

    -```````````````````````````````` - - -And this is code: - -```````````````````````````````` example -`` -. -

    <http://foo.bar.baz>`

    -```````````````````````````````` - - -But this is an autolink: - -```````````````````````````````` example -` -. -

    http://foo.bar.`baz`

    -```````````````````````````````` - - -When a backtick string is not closed by a matching backtick string, -we just have literal backticks: - -```````````````````````````````` example -```foo`` -. -

    ```foo``

    -```````````````````````````````` - - -```````````````````````````````` example -`foo -. -

    `foo

    -```````````````````````````````` - -The following case also illustrates the need for opening and -closing backtick strings to be equal in length: - -```````````````````````````````` example -`foo``bar`` -. -

    `foobar

    -```````````````````````````````` - - -## Emphasis and strong emphasis - -John Gruber's original [Markdown syntax -description](http://daringfireball.net/projects/markdown/syntax#em) says: - -> Markdown treats asterisks (`*`) and underscores (`_`) as indicators of -> emphasis. Text wrapped with one `*` or `_` will be wrapped with an HTML -> `` tag; double `*`'s or `_`'s will be wrapped with an HTML `` -> tag. - -This is enough for most users, but these rules leave much undecided, -especially when it comes to nested emphasis. The original -`Markdown.pl` test suite makes it clear that triple `***` and -`___` delimiters can be used for strong emphasis, and most -implementations have also allowed the following patterns: - -``` markdown -***strong emph*** -***strong** in emph* -***emph* in strong** -**in strong *emph*** -*in emph **strong*** -``` - -The following patterns are less widely supported, but the intent -is clear and they are useful (especially in contexts like bibliography -entries): - -``` markdown -*emph *with emph* in it* -**strong **with strong** in it** -``` - -Many implementations have also restricted intraword emphasis to -the `*` forms, to avoid unwanted emphasis in words containing -internal underscores. (It is best practice to put these in code -spans, but users often do not.) - -``` markdown -internal emphasis: foo*bar*baz -no emphasis: foo_bar_baz -``` - -The rules given below capture all of these patterns, while allowing -for efficient parsing strategies that do not backtrack. - -First, some definitions. A [delimiter run](@) is either -a sequence of one or more `*` characters that is not preceded or -followed by a non-backslash-escaped `*` character, or a sequence -of one or more `_` characters that is not preceded or followed by -a non-backslash-escaped `_` character. - -A [left-flanking delimiter run](@) is -a [delimiter run] that is (1) not followed by [Unicode whitespace], -and either (2a) not followed by a [punctuation character], or -(2b) followed by a [punctuation character] and -preceded by [Unicode whitespace] or a [punctuation character]. -For purposes of this definition, the beginning and the end of -the line count as Unicode whitespace. - -A [right-flanking delimiter run](@) is -a [delimiter run] that is (1) not preceded by [Unicode whitespace], -and either (2a) not preceded by a [punctuation character], or -(2b) preceded by a [punctuation character] and -followed by [Unicode whitespace] or a [punctuation character]. -For purposes of this definition, the beginning and the end of -the line count as Unicode whitespace. - -Here are some examples of delimiter runs. - - - left-flanking but not right-flanking: - - ``` - ***abc - _abc - **"abc" - _"abc" - ``` - - - right-flanking but not left-flanking: - - ``` - abc*** - abc_ - "abc"** - "abc"_ - ``` - - - Both left and right-flanking: - - ``` - abc***def - "abc"_"def" - ``` - - - Neither left nor right-flanking: - - ``` - abc *** def - a _ b - ``` - -(The idea of distinguishing left-flanking and right-flanking -delimiter runs based on the character before and the character -after comes from Roopesh Chander's -[vfmd](http://www.vfmd.org/vfmd-spec/specification/#procedure-for-identifying-emphasis-tags). -vfmd uses the terminology "emphasis indicator string" instead of "delimiter -run," and its rules for distinguishing left- and right-flanking runs -are a bit more complex than the ones given here.) - -The following rules define emphasis and strong emphasis: - -1. A single `*` character [can open emphasis](@) - iff (if and only if) it is part of a [left-flanking delimiter run]. - -2. A single `_` character [can open emphasis] iff - it is part of a [left-flanking delimiter run] - and either (a) not part of a [right-flanking delimiter run] - or (b) part of a [right-flanking delimiter run] - preceded by punctuation. - -3. A single `*` character [can close emphasis](@) - iff it is part of a [right-flanking delimiter run]. - -4. A single `_` character [can close emphasis] iff - it is part of a [right-flanking delimiter run] - and either (a) not part of a [left-flanking delimiter run] - or (b) part of a [left-flanking delimiter run] - followed by punctuation. - -5. A double `**` [can open strong emphasis](@) - iff it is part of a [left-flanking delimiter run]. - -6. A double `__` [can open strong emphasis] iff - it is part of a [left-flanking delimiter run] - and either (a) not part of a [right-flanking delimiter run] - or (b) part of a [right-flanking delimiter run] - preceded by punctuation. - -7. A double `**` [can close strong emphasis](@) - iff it is part of a [right-flanking delimiter run]. - -8. A double `__` [can close strong emphasis] iff - it is part of a [right-flanking delimiter run] - and either (a) not part of a [left-flanking delimiter run] - or (b) part of a [left-flanking delimiter run] - followed by punctuation. - -9. Emphasis begins with a delimiter that [can open emphasis] and ends - with a delimiter that [can close emphasis], and that uses the same - character (`_` or `*`) as the opening delimiter. The - opening and closing delimiters must belong to separate - [delimiter runs]. If one of the delimiters can both - open and close emphasis, then the sum of the lengths of the - delimiter runs containing the opening and closing delimiters - must not be a multiple of 3 unless both lengths are - multiples of 3. - -10. Strong emphasis begins with a delimiter that - [can open strong emphasis] and ends with a delimiter that - [can close strong emphasis], and that uses the same character - (`_` or `*`) as the opening delimiter. The - opening and closing delimiters must belong to separate - [delimiter runs]. If one of the delimiters can both open - and close strong emphasis, then the sum of the lengths of - the delimiter runs containing the opening and closing - delimiters must not be a multiple of 3 unless both lengths - are multiples of 3. - -11. A literal `*` character cannot occur at the beginning or end of - `*`-delimited emphasis or `**`-delimited strong emphasis, unless it - is backslash-escaped. - -12. A literal `_` character cannot occur at the beginning or end of - `_`-delimited emphasis or `__`-delimited strong emphasis, unless it - is backslash-escaped. - -Where rules 1--12 above are compatible with multiple parsings, -the following principles resolve ambiguity: - -13. The number of nestings should be minimized. Thus, for example, - an interpretation `...` is always preferred to - `...`. - -14. An interpretation `...` is always - preferred to `...`. - -15. When two potential emphasis or strong emphasis spans overlap, - so that the second begins before the first ends and ends after - the first ends, the first takes precedence. Thus, for example, - `*foo _bar* baz_` is parsed as `foo _bar baz_` rather - than `*foo bar* baz`. - -16. When there are two potential emphasis or strong emphasis spans - with the same closing delimiter, the shorter one (the one that - opens later) takes precedence. Thus, for example, - `**foo **bar baz**` is parsed as `**foo bar baz` - rather than `foo **bar baz`. - -17. Inline code spans, links, images, and HTML tags group more tightly - than emphasis. So, when there is a choice between an interpretation - that contains one of these elements and one that does not, the - former always wins. Thus, for example, `*[foo*](bar)` is - parsed as `*foo*` rather than as - `[foo](bar)`. - -These rules can be illustrated through a series of examples. - -Rule 1: - -```````````````````````````````` example -*foo bar* -. -

    foo bar

    -```````````````````````````````` - - -This is not emphasis, because the opening `*` is followed by -whitespace, and hence not part of a [left-flanking delimiter run]: - -```````````````````````````````` example -a * foo bar* -. -

    a * foo bar*

    -```````````````````````````````` - - -This is not emphasis, because the opening `*` is preceded -by an alphanumeric and followed by punctuation, and hence -not part of a [left-flanking delimiter run]: - -```````````````````````````````` example -a*"foo"* -. -

    a*"foo"*

    -```````````````````````````````` - - -Unicode nonbreaking spaces count as whitespace, too: - -```````````````````````````````` example -* a * -. -

    * a *

    -```````````````````````````````` - - -Intraword emphasis with `*` is permitted: - -```````````````````````````````` example -foo*bar* -. -

    foobar

    -```````````````````````````````` - - -```````````````````````````````` example -5*6*78 -. -

    5678

    -```````````````````````````````` - - -Rule 2: - -```````````````````````````````` example -_foo bar_ -. -

    foo bar

    -```````````````````````````````` - - -This is not emphasis, because the opening `_` is followed by -whitespace: - -```````````````````````````````` example -_ foo bar_ -. -

    _ foo bar_

    -```````````````````````````````` - - -This is not emphasis, because the opening `_` is preceded -by an alphanumeric and followed by punctuation: - -```````````````````````````````` example -a_"foo"_ -. -

    a_"foo"_

    -```````````````````````````````` - - -Emphasis with `_` is not allowed inside words: - -```````````````````````````````` example -foo_bar_ -. -

    foo_bar_

    -```````````````````````````````` - - -```````````````````````````````` example -5_6_78 -. -

    5_6_78

    -```````````````````````````````` - - -```````````````````````````````` example -пристаням_стремятся_ -. -

    пристаням_стремятся_

    -```````````````````````````````` - - -Here `_` does not generate emphasis, because the first delimiter run -is right-flanking and the second left-flanking: - -```````````````````````````````` example -aa_"bb"_cc -. -

    aa_"bb"_cc

    -```````````````````````````````` - - -This is emphasis, even though the opening delimiter is -both left- and right-flanking, because it is preceded by -punctuation: - -```````````````````````````````` example -foo-_(bar)_ -. -

    foo-(bar)

    -```````````````````````````````` - - -Rule 3: - -This is not emphasis, because the closing delimiter does -not match the opening delimiter: - -```````````````````````````````` example -_foo* -. -

    _foo*

    -```````````````````````````````` - - -This is not emphasis, because the closing `*` is preceded by -whitespace: - -```````````````````````````````` example -*foo bar * -. -

    *foo bar *

    -```````````````````````````````` - - -A newline also counts as whitespace: - -```````````````````````````````` example -*foo bar -* -. -

    *foo bar -*

    -```````````````````````````````` - - -This is not emphasis, because the second `*` is -preceded by punctuation and followed by an alphanumeric -(hence it is not part of a [right-flanking delimiter run]: - -```````````````````````````````` example -*(*foo) -. -

    *(*foo)

    -```````````````````````````````` - - -The point of this restriction is more easily appreciated -with this example: - -```````````````````````````````` example -*(*foo*)* -. -

    (foo)

    -```````````````````````````````` - - -Intraword emphasis with `*` is allowed: - -```````````````````````````````` example -*foo*bar -. -

    foobar

    -```````````````````````````````` - - - -Rule 4: - -This is not emphasis, because the closing `_` is preceded by -whitespace: - -```````````````````````````````` example -_foo bar _ -. -

    _foo bar _

    -```````````````````````````````` - - -This is not emphasis, because the second `_` is -preceded by punctuation and followed by an alphanumeric: - -```````````````````````````````` example -_(_foo) -. -

    _(_foo)

    -```````````````````````````````` - - -This is emphasis within emphasis: - -```````````````````````````````` example -_(_foo_)_ -. -

    (foo)

    -```````````````````````````````` - - -Intraword emphasis is disallowed for `_`: - -```````````````````````````````` example -_foo_bar -. -

    _foo_bar

    -```````````````````````````````` - - -```````````````````````````````` example -_пристаням_стремятся -. -

    _пристаням_стремятся

    -```````````````````````````````` - - -```````````````````````````````` example -_foo_bar_baz_ -. -

    foo_bar_baz

    -```````````````````````````````` - - -This is emphasis, even though the closing delimiter is -both left- and right-flanking, because it is followed by -punctuation: - -```````````````````````````````` example -_(bar)_. -. -

    (bar).

    -```````````````````````````````` - - -Rule 5: - -```````````````````````````````` example -**foo bar** -. -

    foo bar

    -```````````````````````````````` - - -This is not strong emphasis, because the opening delimiter is -followed by whitespace: - -```````````````````````````````` example -** foo bar** -. -

    ** foo bar**

    -```````````````````````````````` - - -This is not strong emphasis, because the opening `**` is preceded -by an alphanumeric and followed by punctuation, and hence -not part of a [left-flanking delimiter run]: - -```````````````````````````````` example -a**"foo"** -. -

    a**"foo"**

    -```````````````````````````````` - - -Intraword strong emphasis with `**` is permitted: - -```````````````````````````````` example -foo**bar** -. -

    foobar

    -```````````````````````````````` - - -Rule 6: - -```````````````````````````````` example -__foo bar__ -. -

    foo bar

    -```````````````````````````````` - - -This is not strong emphasis, because the opening delimiter is -followed by whitespace: - -```````````````````````````````` example -__ foo bar__ -. -

    __ foo bar__

    -```````````````````````````````` - - -A newline counts as whitespace: -```````````````````````````````` example -__ -foo bar__ -. -

    __ -foo bar__

    -```````````````````````````````` - - -This is not strong emphasis, because the opening `__` is preceded -by an alphanumeric and followed by punctuation: - -```````````````````````````````` example -a__"foo"__ -. -

    a__"foo"__

    -```````````````````````````````` - - -Intraword strong emphasis is forbidden with `__`: - -```````````````````````````````` example -foo__bar__ -. -

    foo__bar__

    -```````````````````````````````` - - -```````````````````````````````` example -5__6__78 -. -

    5__6__78

    -```````````````````````````````` - - -```````````````````````````````` example -пристаням__стремятся__ -. -

    пристаням__стремятся__

    -```````````````````````````````` - - -```````````````````````````````` example -__foo, __bar__, baz__ -. -

    foo, bar, baz

    -```````````````````````````````` - - -This is strong emphasis, even though the opening delimiter is -both left- and right-flanking, because it is preceded by -punctuation: - -```````````````````````````````` example -foo-__(bar)__ -. -

    foo-(bar)

    -```````````````````````````````` - - - -Rule 7: - -This is not strong emphasis, because the closing delimiter is preceded -by whitespace: - -```````````````````````````````` example -**foo bar ** -. -

    **foo bar **

    -```````````````````````````````` - - -(Nor can it be interpreted as an emphasized `*foo bar *`, because of -Rule 11.) - -This is not strong emphasis, because the second `**` is -preceded by punctuation and followed by an alphanumeric: - -```````````````````````````````` example -**(**foo) -. -

    **(**foo)

    -```````````````````````````````` - - -The point of this restriction is more easily appreciated -with these examples: - -```````````````````````````````` example -*(**foo**)* -. -

    (foo)

    -```````````````````````````````` - - -```````````````````````````````` example -**Gomphocarpus (*Gomphocarpus physocarpus*, syn. -*Asclepias physocarpa*)** -. -

    Gomphocarpus (Gomphocarpus physocarpus, syn. -Asclepias physocarpa)

    -```````````````````````````````` - - -```````````````````````````````` example -**foo "*bar*" foo** -. -

    foo "bar" foo

    -```````````````````````````````` - - -Intraword emphasis: - -```````````````````````````````` example -**foo**bar -. -

    foobar

    -```````````````````````````````` - - -Rule 8: - -This is not strong emphasis, because the closing delimiter is -preceded by whitespace: - -```````````````````````````````` example -__foo bar __ -. -

    __foo bar __

    -```````````````````````````````` - - -This is not strong emphasis, because the second `__` is -preceded by punctuation and followed by an alphanumeric: - -```````````````````````````````` example -__(__foo) -. -

    __(__foo)

    -```````````````````````````````` - - -The point of this restriction is more easily appreciated -with this example: - -```````````````````````````````` example -_(__foo__)_ -. -

    (foo)

    -```````````````````````````````` - - -Intraword strong emphasis is forbidden with `__`: - -```````````````````````````````` example -__foo__bar -. -

    __foo__bar

    -```````````````````````````````` - - -```````````````````````````````` example -__пристаням__стремятся -. -

    __пристаням__стремятся

    -```````````````````````````````` - - -```````````````````````````````` example -__foo__bar__baz__ -. -

    foo__bar__baz

    -```````````````````````````````` - - -This is strong emphasis, even though the closing delimiter is -both left- and right-flanking, because it is followed by -punctuation: - -```````````````````````````````` example -__(bar)__. -. -

    (bar).

    -```````````````````````````````` - - -Rule 9: - -Any nonempty sequence of inline elements can be the contents of an -emphasized span. - -```````````````````````````````` example -*foo [bar](/url)* -. -

    foo bar

    -```````````````````````````````` - - -```````````````````````````````` example -*foo -bar* -. -

    foo -bar

    -```````````````````````````````` - - -In particular, emphasis and strong emphasis can be nested -inside emphasis: - -```````````````````````````````` example -_foo __bar__ baz_ -. -

    foo bar baz

    -```````````````````````````````` - - -```````````````````````````````` example -_foo _bar_ baz_ -. -

    foo bar baz

    -```````````````````````````````` - - -```````````````````````````````` example -__foo_ bar_ -. -

    foo bar

    -```````````````````````````````` - - -```````````````````````````````` example -*foo *bar** -. -

    foo bar

    -```````````````````````````````` - - -```````````````````````````````` example -*foo **bar** baz* -. -

    foo bar baz

    -```````````````````````````````` - -```````````````````````````````` example -*foo**bar**baz* -. -

    foobarbaz

    -```````````````````````````````` - -Note that in the preceding case, the interpretation - -``` markdown -

    foobarbaz

    -``` - - -is precluded by the condition that a delimiter that -can both open and close (like the `*` after `foo`) -cannot form emphasis if the sum of the lengths of -the delimiter runs containing the opening and -closing delimiters is a multiple of 3 unless -both lengths are multiples of 3. - - -For the same reason, we don't get two consecutive -emphasis sections in this example: - -```````````````````````````````` example -*foo**bar* -. -

    foo**bar

    -```````````````````````````````` - - -The same condition ensures that the following -cases are all strong emphasis nested inside -emphasis, even when the interior spaces are -omitted: - - -```````````````````````````````` example -***foo** bar* -. -

    foo bar

    -```````````````````````````````` - - -```````````````````````````````` example -*foo **bar*** -. -

    foo bar

    -```````````````````````````````` - - -```````````````````````````````` example -*foo**bar*** -. -

    foobar

    -```````````````````````````````` - - -When the lengths of the interior closing and opening -delimiter runs are *both* multiples of 3, though, -they can match to create emphasis: - -```````````````````````````````` example -foo***bar***baz -. -

    foobarbaz

    -```````````````````````````````` - -```````````````````````````````` example -foo******bar*********baz -. -

    foobar***baz

    -```````````````````````````````` - - -Indefinite levels of nesting are possible: - -```````````````````````````````` example -*foo **bar *baz* bim** bop* -. -

    foo bar baz bim bop

    -```````````````````````````````` - - -```````````````````````````````` example -*foo [*bar*](/url)* -. -

    foo bar

    -```````````````````````````````` - - -There can be no empty emphasis or strong emphasis: - -```````````````````````````````` example -** is not an empty emphasis -. -

    ** is not an empty emphasis

    -```````````````````````````````` - - -```````````````````````````````` example -**** is not an empty strong emphasis -. -

    **** is not an empty strong emphasis

    -```````````````````````````````` - - - -Rule 10: - -Any nonempty sequence of inline elements can be the contents of an -strongly emphasized span. - -```````````````````````````````` example -**foo [bar](/url)** -. -

    foo bar

    -```````````````````````````````` - - -```````````````````````````````` example -**foo -bar** -. -

    foo -bar

    -```````````````````````````````` - - -In particular, emphasis and strong emphasis can be nested -inside strong emphasis: - -```````````````````````````````` example -__foo _bar_ baz__ -. -

    foo bar baz

    -```````````````````````````````` - - -```````````````````````````````` example -__foo __bar__ baz__ -. -

    foo bar baz

    -```````````````````````````````` - - -```````````````````````````````` example -____foo__ bar__ -. -

    foo bar

    -```````````````````````````````` - - -```````````````````````````````` example -**foo **bar**** -. -

    foo bar

    -```````````````````````````````` - - -```````````````````````````````` example -**foo *bar* baz** -. -

    foo bar baz

    -```````````````````````````````` - - -```````````````````````````````` example -**foo*bar*baz** -. -

    foobarbaz

    -```````````````````````````````` - - -```````````````````````````````` example -***foo* bar** -. -

    foo bar

    -```````````````````````````````` - - -```````````````````````````````` example -**foo *bar*** -. -

    foo bar

    -```````````````````````````````` - - -Indefinite levels of nesting are possible: - -```````````````````````````````` example -**foo *bar **baz** -bim* bop** -. -

    foo bar baz -bim bop

    -```````````````````````````````` - - -```````````````````````````````` example -**foo [*bar*](/url)** -. -

    foo bar

    -```````````````````````````````` - - -There can be no empty emphasis or strong emphasis: - -```````````````````````````````` example -__ is not an empty emphasis -. -

    __ is not an empty emphasis

    -```````````````````````````````` - - -```````````````````````````````` example -____ is not an empty strong emphasis -. -

    ____ is not an empty strong emphasis

    -```````````````````````````````` - - - -Rule 11: - -```````````````````````````````` example -foo *** -. -

    foo ***

    -```````````````````````````````` - - -```````````````````````````````` example -foo *\** -. -

    foo *

    -```````````````````````````````` - - -```````````````````````````````` example -foo *_* -. -

    foo _

    -```````````````````````````````` - - -```````````````````````````````` example -foo ***** -. -

    foo *****

    -```````````````````````````````` - - -```````````````````````````````` example -foo **\*** -. -

    foo *

    -```````````````````````````````` - - -```````````````````````````````` example -foo **_** -. -

    foo _

    -```````````````````````````````` - - -Note that when delimiters do not match evenly, Rule 11 determines -that the excess literal `*` characters will appear outside of the -emphasis, rather than inside it: - -```````````````````````````````` example -**foo* -. -

    *foo

    -```````````````````````````````` - - -```````````````````````````````` example -*foo** -. -

    foo*

    -```````````````````````````````` - - -```````````````````````````````` example -***foo** -. -

    *foo

    -```````````````````````````````` - - -```````````````````````````````` example -****foo* -. -

    ***foo

    -```````````````````````````````` - - -```````````````````````````````` example -**foo*** -. -

    foo*

    -```````````````````````````````` - - -```````````````````````````````` example -*foo**** -. -

    foo***

    -```````````````````````````````` - - - -Rule 12: - -```````````````````````````````` example -foo ___ -. -

    foo ___

    -```````````````````````````````` - - -```````````````````````````````` example -foo _\__ -. -

    foo _

    -```````````````````````````````` - - -```````````````````````````````` example -foo _*_ -. -

    foo *

    -```````````````````````````````` - - -```````````````````````````````` example -foo _____ -. -

    foo _____

    -```````````````````````````````` - - -```````````````````````````````` example -foo __\___ -. -

    foo _

    -```````````````````````````````` - - -```````````````````````````````` example -foo __*__ -. -

    foo *

    -```````````````````````````````` - - -```````````````````````````````` example -__foo_ -. -

    _foo

    -```````````````````````````````` - - -Note that when delimiters do not match evenly, Rule 12 determines -that the excess literal `_` characters will appear outside of the -emphasis, rather than inside it: - -```````````````````````````````` example -_foo__ -. -

    foo_

    -```````````````````````````````` - - -```````````````````````````````` example -___foo__ -. -

    _foo

    -```````````````````````````````` - - -```````````````````````````````` example -____foo_ -. -

    ___foo

    -```````````````````````````````` - - -```````````````````````````````` example -__foo___ -. -

    foo_

    -```````````````````````````````` - - -```````````````````````````````` example -_foo____ -. -

    foo___

    -```````````````````````````````` - - -Rule 13 implies that if you want emphasis nested directly inside -emphasis, you must use different delimiters: - -```````````````````````````````` example -**foo** -. -

    foo

    -```````````````````````````````` - - -```````````````````````````````` example -*_foo_* -. -

    foo

    -```````````````````````````````` - - -```````````````````````````````` example -__foo__ -. -

    foo

    -```````````````````````````````` - - -```````````````````````````````` example -_*foo*_ -. -

    foo

    -```````````````````````````````` - - -However, strong emphasis within strong emphasis is possible without -switching delimiters: - -```````````````````````````````` example -****foo**** -. -

    foo

    -```````````````````````````````` - - -```````````````````````````````` example -____foo____ -. -

    foo

    -```````````````````````````````` - - - -Rule 13 can be applied to arbitrarily long sequences of -delimiters: - -```````````````````````````````` example -******foo****** -. -

    foo

    -```````````````````````````````` - - -Rule 14: - -```````````````````````````````` example -***foo*** -. -

    foo

    -```````````````````````````````` - - -```````````````````````````````` example -_____foo_____ -. -

    foo

    -```````````````````````````````` - - -Rule 15: - -```````````````````````````````` example -*foo _bar* baz_ -. -

    foo _bar baz_

    -```````````````````````````````` - - -```````````````````````````````` example -*foo __bar *baz bim__ bam* -. -

    foo bar *baz bim bam

    -```````````````````````````````` - - -Rule 16: - -```````````````````````````````` example -**foo **bar baz** -. -

    **foo bar baz

    -```````````````````````````````` - - -```````````````````````````````` example -*foo *bar baz* -. -

    *foo bar baz

    -```````````````````````````````` - - -Rule 17: - -```````````````````````````````` example -*[bar*](/url) -. -

    *bar*

    -```````````````````````````````` - - -```````````````````````````````` example -_foo [bar_](/url) -. -

    _foo bar_

    -```````````````````````````````` - - -```````````````````````````````` example -* -. -

    *

    -```````````````````````````````` - - -```````````````````````````````` example -** -. -

    **

    -```````````````````````````````` - - -```````````````````````````````` example -__ -. -

    __

    -```````````````````````````````` - - -```````````````````````````````` example -*a `*`* -. -

    a *

    -```````````````````````````````` - - -```````````````````````````````` example -_a `_`_ -. -

    a _

    -```````````````````````````````` - - -```````````````````````````````` example -**a -. -

    **ahttp://foo.bar/?q=**

    -```````````````````````````````` - - -```````````````````````````````` example -__a -. -

    __ahttp://foo.bar/?q=__

    -```````````````````````````````` - - - -## Links - -A link contains [link text] (the visible text), a [link destination] -(the URI that is the link destination), and optionally a [link title]. -There are two basic kinds of links in Markdown. In [inline links] the -destination and title are given immediately after the link text. In -[reference links] the destination and title are defined elsewhere in -the document. - -A [link text](@) consists of a sequence of zero or more -inline elements enclosed by square brackets (`[` and `]`). The -following rules apply: - -- Links may not contain other links, at any level of nesting. If - multiple otherwise valid link definitions appear nested inside each - other, the inner-most definition is used. - -- Brackets are allowed in the [link text] only if (a) they - are backslash-escaped or (b) they appear as a matched pair of brackets, - with an open bracket `[`, a sequence of zero or more inlines, and - a close bracket `]`. - -- Backtick [code spans], [autolinks], and raw [HTML tags] bind more tightly - than the brackets in link text. Thus, for example, - `` [foo`]` `` could not be a link text, since the second `]` - is part of a code span. - -- The brackets in link text bind more tightly than markers for - [emphasis and strong emphasis]. Thus, for example, `*[foo*](url)` is a link. - -A [link destination](@) consists of either - -- a sequence of zero or more characters between an opening `<` and a - closing `>` that contains no line breaks or unescaped - `<` or `>` characters, or - -- a nonempty sequence of characters that does not start with - `<`, does not include ASCII space or control characters, and - includes parentheses only if (a) they are backslash-escaped or - (b) they are part of a balanced pair of unescaped parentheses. - (Implementations may impose limits on parentheses nesting to - avoid performance issues, but at least three levels of nesting - should be supported.) - -A [link title](@) consists of either - -- a sequence of zero or more characters between straight double-quote - characters (`"`), including a `"` character only if it is - backslash-escaped, or - -- a sequence of zero or more characters between straight single-quote - characters (`'`), including a `'` character only if it is - backslash-escaped, or - -- a sequence of zero or more characters between matching parentheses - (`(...)`), including a `(` or `)` character only if it is - backslash-escaped. - -Although [link titles] may span multiple lines, they may not contain -a [blank line]. - -An [inline link](@) consists of a [link text] followed immediately -by a left parenthesis `(`, optional [whitespace], an optional -[link destination], an optional [link title] separated from the link -destination by [whitespace], optional [whitespace], and a right -parenthesis `)`. The link's text consists of the inlines contained -in the [link text] (excluding the enclosing square brackets). -The link's URI consists of the link destination, excluding enclosing -`<...>` if present, with backslash-escapes in effect as described -above. The link's title consists of the link title, excluding its -enclosing delimiters, with backslash-escapes in effect as described -above. - -Here is a simple inline link: - -```````````````````````````````` example -[link](/uri "title") -. -

    link

    -```````````````````````````````` - - -The title may be omitted: - -```````````````````````````````` example -[link](/uri) -. -

    link

    -```````````````````````````````` - - -Both the title and the destination may be omitted: - -```````````````````````````````` example -[link]() -. -

    link

    -```````````````````````````````` - - -```````````````````````````````` example -[link](<>) -. -

    link

    -```````````````````````````````` - -The destination can only contain spaces if it is -enclosed in pointy brackets: - -```````````````````````````````` example -[link](/my uri) -. -

    [link](/my uri)

    -```````````````````````````````` - -```````````````````````````````` example -[link](
    ) -. -

    link

    -```````````````````````````````` - -The destination cannot contain line breaks, -even if enclosed in pointy brackets: - -```````````````````````````````` example -[link](foo -bar) -. -

    [link](foo -bar)

    -```````````````````````````````` - -```````````````````````````````` example -[link]() -. -

    [link]()

    -```````````````````````````````` - -The destination can contain `)` if it is enclosed -in pointy brackets: - -```````````````````````````````` example -[a]() -. -

    a

    -```````````````````````````````` - -Pointy brackets that enclose links must be unescaped: - -```````````````````````````````` example -[link]() -. -

    [link](<foo>)

    -```````````````````````````````` - -These are not links, because the opening pointy bracket -is not matched properly: - -```````````````````````````````` example -[a]( -[a](c) -. -

    [a](<b)c -[a](<b)c> -[a](c)

    -```````````````````````````````` - -Parentheses inside the link destination may be escaped: - -```````````````````````````````` example -[link](\(foo\)) -. -

    link

    -```````````````````````````````` - -Any number of parentheses are allowed without escaping, as long as they are -balanced: - -```````````````````````````````` example -[link](foo(and(bar))) -. -

    link

    -```````````````````````````````` - -However, if you have unbalanced parentheses, you need to escape or use the -`<...>` form: - -```````````````````````````````` example -[link](foo\(and\(bar\)) -. -

    link

    -```````````````````````````````` - - -```````````````````````````````` example -[link]() -. -

    link

    -```````````````````````````````` - - -Parentheses and other symbols can also be escaped, as usual -in Markdown: - -```````````````````````````````` example -[link](foo\)\:) -. -

    link

    -```````````````````````````````` - - -A link can contain fragment identifiers and queries: - -```````````````````````````````` example -[link](#fragment) - -[link](http://example.com#fragment) - -[link](http://example.com?foo=3#frag) -. -

    link

    -

    link

    -

    link

    -```````````````````````````````` - - -Note that a backslash before a non-escapable character is -just a backslash: - -```````````````````````````````` example -[link](foo\bar) -. -

    link

    -```````````````````````````````` - - -URL-escaping should be left alone inside the destination, as all -URL-escaped characters are also valid URL characters. Entity and -numerical character references in the destination will be parsed -into the corresponding Unicode code points, as usual. These may -be optionally URL-escaped when written as HTML, but this spec -does not enforce any particular policy for rendering URLs in -HTML or other formats. Renderers may make different decisions -about how to escape or normalize URLs in the output. - -```````````````````````````````` example -[link](foo%20bä) -. -

    link

    -```````````````````````````````` - - -Note that, because titles can often be parsed as destinations, -if you try to omit the destination and keep the title, you'll -get unexpected results: - -```````````````````````````````` example -[link]("title") -. -

    link

    -```````````````````````````````` - - -Titles may be in single quotes, double quotes, or parentheses: - -```````````````````````````````` example -[link](/url "title") -[link](/url 'title') -[link](/url (title)) -. -

    link -link -link

    -```````````````````````````````` - - -Backslash escapes and entity and numeric character references -may be used in titles: - -```````````````````````````````` example -[link](/url "title \""") -. -

    link

    -```````````````````````````````` - - -Titles must be separated from the link using a [whitespace]. -Other [Unicode whitespace] like non-breaking space doesn't work. - -```````````````````````````````` example -[link](/url "title") -. -

    link

    -```````````````````````````````` - - -Nested balanced quotes are not allowed without escaping: - -```````````````````````````````` example -[link](/url "title "and" title") -. -

    [link](/url "title "and" title")

    -```````````````````````````````` - - -But it is easy to work around this by using a different quote type: - -```````````````````````````````` example -[link](/url 'title "and" title') -. -

    link

    -```````````````````````````````` - - -(Note: `Markdown.pl` did allow double quotes inside a double-quoted -title, and its test suite included a test demonstrating this. -But it is hard to see a good rationale for the extra complexity this -brings, since there are already many ways---backslash escaping, -entity and numeric character references, or using a different -quote type for the enclosing title---to write titles containing -double quotes. `Markdown.pl`'s handling of titles has a number -of other strange features. For example, it allows single-quoted -titles in inline links, but not reference links. And, in -reference links but not inline links, it allows a title to begin -with `"` and end with `)`. `Markdown.pl` 1.0.1 even allows -titles with no closing quotation mark, though 1.0.2b8 does not. -It seems preferable to adopt a simple, rational rule that works -the same way in inline links and link reference definitions.) - -[Whitespace] is allowed around the destination and title: - -```````````````````````````````` example -[link]( /uri - "title" ) -. -

    link

    -```````````````````````````````` - - -But it is not allowed between the link text and the -following parenthesis: - -```````````````````````````````` example -[link] (/uri) -. -

    [link] (/uri)

    -```````````````````````````````` - - -The link text may contain balanced brackets, but not unbalanced ones, -unless they are escaped: - -```````````````````````````````` example -[link [foo [bar]]](/uri) -. -

    link [foo [bar]]

    -```````````````````````````````` - - -```````````````````````````````` example -[link] bar](/uri) -. -

    [link] bar](/uri)

    -```````````````````````````````` - - -```````````````````````````````` example -[link [bar](/uri) -. -

    [link bar

    -```````````````````````````````` - - -```````````````````````````````` example -[link \[bar](/uri) -. -

    link [bar

    -```````````````````````````````` - - -The link text may contain inline content: - -```````````````````````````````` example -[link *foo **bar** `#`*](/uri) -. -

    link foo bar #

    -```````````````````````````````` - - -```````````````````````````````` example -[![moon](moon.jpg)](/uri) -. -

    moon

    -```````````````````````````````` - - -However, links may not contain other links, at any level of nesting. - -```````````````````````````````` example -[foo [bar](/uri)](/uri) -. -

    [foo bar](/uri)

    -```````````````````````````````` - - -```````````````````````````````` example -[foo *[bar [baz](/uri)](/uri)*](/uri) -. -

    [foo [bar baz](/uri)](/uri)

    -```````````````````````````````` - - -```````````````````````````````` example -![[[foo](uri1)](uri2)](uri3) -. -

    [foo](uri2)

    -```````````````````````````````` - - -These cases illustrate the precedence of link text grouping over -emphasis grouping: - -```````````````````````````````` example -*[foo*](/uri) -. -

    *foo*

    -```````````````````````````````` - - -```````````````````````````````` example -[foo *bar](baz*) -. -

    foo *bar

    -```````````````````````````````` - - -Note that brackets that *aren't* part of links do not take -precedence: - -```````````````````````````````` example -*foo [bar* baz] -. -

    foo [bar baz]

    -```````````````````````````````` - - -These cases illustrate the precedence of HTML tags, code spans, -and autolinks over link grouping: - -```````````````````````````````` example -[foo -. -

    [foo

    -```````````````````````````````` - - -```````````````````````````````` example -[foo`](/uri)` -. -

    [foo](/uri)

    -```````````````````````````````` - - -```````````````````````````````` example -[foo -. -

    [foohttp://example.com/?search=](uri)

    -```````````````````````````````` - - -There are three kinds of [reference link](@)s: -[full](#full-reference-link), [collapsed](#collapsed-reference-link), -and [shortcut](#shortcut-reference-link). - -A [full reference link](@) -consists of a [link text] immediately followed by a [link label] -that [matches] a [link reference definition] elsewhere in the document. - -A [link label](@) begins with a left bracket (`[`) and ends -with the first right bracket (`]`) that is not backslash-escaped. -Between these brackets there must be at least one [non-whitespace character]. -Unescaped square bracket characters are not allowed inside the -opening and closing square brackets of [link labels]. A link -label can have at most 999 characters inside the square -brackets. - -One label [matches](@) -another just in case their normalized forms are equal. To normalize a -label, strip off the opening and closing brackets, -perform the *Unicode case fold*, strip leading and trailing -[whitespace] and collapse consecutive internal -[whitespace] to a single space. If there are multiple -matching reference link definitions, the one that comes first in the -document is used. (It is desirable in such cases to emit a warning.) - -The contents of the first link label are parsed as inlines, which are -used as the link's text. The link's URI and title are provided by the -matching [link reference definition]. - -Here is a simple example: - -```````````````````````````````` example -[foo][bar] - -[bar]: /url "title" -. -

    foo

    -```````````````````````````````` - - -The rules for the [link text] are the same as with -[inline links]. Thus: - -The link text may contain balanced brackets, but not unbalanced ones, -unless they are escaped: - -```````````````````````````````` example -[link [foo [bar]]][ref] - -[ref]: /uri -. -

    link [foo [bar]]

    -```````````````````````````````` - - -```````````````````````````````` example -[link \[bar][ref] - -[ref]: /uri -. -

    link [bar

    -```````````````````````````````` - - -The link text may contain inline content: - -```````````````````````````````` example -[link *foo **bar** `#`*][ref] - -[ref]: /uri -. -

    link foo bar #

    -```````````````````````````````` - - -```````````````````````````````` example -[![moon](moon.jpg)][ref] - -[ref]: /uri -. -

    moon

    -```````````````````````````````` - - -However, links may not contain other links, at any level of nesting. - -```````````````````````````````` example -[foo [bar](/uri)][ref] - -[ref]: /uri -. -

    [foo bar]ref

    -```````````````````````````````` - - -```````````````````````````````` example -[foo *bar [baz][ref]*][ref] - -[ref]: /uri -. -

    [foo bar baz]ref

    -```````````````````````````````` - - -(In the examples above, we have two [shortcut reference links] -instead of one [full reference link].) - -The following cases illustrate the precedence of link text grouping over -emphasis grouping: - -```````````````````````````````` example -*[foo*][ref] - -[ref]: /uri -. -

    *foo*

    -```````````````````````````````` - - -```````````````````````````````` example -[foo *bar][ref] - -[ref]: /uri -. -

    foo *bar

    -```````````````````````````````` - - -These cases illustrate the precedence of HTML tags, code spans, -and autolinks over link grouping: - -```````````````````````````````` example -[foo - -[ref]: /uri -. -

    [foo

    -```````````````````````````````` - - -```````````````````````````````` example -[foo`][ref]` - -[ref]: /uri -. -

    [foo][ref]

    -```````````````````````````````` - - -```````````````````````````````` example -[foo - -[ref]: /uri -. -

    [foohttp://example.com/?search=][ref]

    -```````````````````````````````` - - -Matching is case-insensitive: - -```````````````````````````````` example -[foo][BaR] - -[bar]: /url "title" -. -

    foo

    -```````````````````````````````` - - -Unicode case fold is used: - -```````````````````````````````` example -[Толпой][Толпой] is a Russian word. - -[ТОЛПОЙ]: /url -. -

    Толпой is a Russian word.

    -```````````````````````````````` - - -Consecutive internal [whitespace] is treated as one space for -purposes of determining matching: - -```````````````````````````````` example -[Foo - bar]: /url - -[Baz][Foo bar] -. -

    Baz

    -```````````````````````````````` - - -No [whitespace] is allowed between the [link text] and the -[link label]: - -```````````````````````````````` example -[foo] [bar] - -[bar]: /url "title" -. -

    [foo] bar

    -```````````````````````````````` - - -```````````````````````````````` example -[foo] -[bar] - -[bar]: /url "title" -. -

    [foo] -bar

    -```````````````````````````````` - - -This is a departure from John Gruber's original Markdown syntax -description, which explicitly allows whitespace between the link -text and the link label. It brings reference links in line with -[inline links], which (according to both original Markdown and -this spec) cannot have whitespace after the link text. More -importantly, it prevents inadvertent capture of consecutive -[shortcut reference links]. If whitespace is allowed between the -link text and the link label, then in the following we will have -a single reference link, not two shortcut reference links, as -intended: - -``` markdown -[foo] -[bar] - -[foo]: /url1 -[bar]: /url2 -``` - -(Note that [shortcut reference links] were introduced by Gruber -himself in a beta version of `Markdown.pl`, but never included -in the official syntax description. Without shortcut reference -links, it is harmless to allow space between the link text and -link label; but once shortcut references are introduced, it is -too dangerous to allow this, as it frequently leads to -unintended results.) - -When there are multiple matching [link reference definitions], -the first is used: - -```````````````````````````````` example -[foo]: /url1 - -[foo]: /url2 - -[bar][foo] -. -

    bar

    -```````````````````````````````` - - -Note that matching is performed on normalized strings, not parsed -inline content. So the following does not match, even though the -labels define equivalent inline content: - -```````````````````````````````` example -[bar][foo\!] - -[foo!]: /url -. -

    [bar][foo!]

    -```````````````````````````````` - - -[Link labels] cannot contain brackets, unless they are -backslash-escaped: - -```````````````````````````````` example -[foo][ref[] - -[ref[]: /uri -. -

    [foo][ref[]

    -

    [ref[]: /uri

    -```````````````````````````````` - - -```````````````````````````````` example -[foo][ref[bar]] - -[ref[bar]]: /uri -. -

    [foo][ref[bar]]

    -

    [ref[bar]]: /uri

    -```````````````````````````````` - - -```````````````````````````````` example -[[[foo]]] - -[[[foo]]]: /url -. -

    [[[foo]]]

    -

    [[[foo]]]: /url

    -```````````````````````````````` - - -```````````````````````````````` example -[foo][ref\[] - -[ref\[]: /uri -. -

    foo

    -```````````````````````````````` - - -Note that in this example `]` is not backslash-escaped: - -```````````````````````````````` example -[bar\\]: /uri - -[bar\\] -. -

    bar\

    -```````````````````````````````` - - -A [link label] must contain at least one [non-whitespace character]: - -```````````````````````````````` example -[] - -[]: /uri -. -

    []

    -

    []: /uri

    -```````````````````````````````` - - -```````````````````````````````` example -[ - ] - -[ - ]: /uri -. -

    [ -]

    -

    [ -]: /uri

    -```````````````````````````````` - - -A [collapsed reference link](@) -consists of a [link label] that [matches] a -[link reference definition] elsewhere in the -document, followed by the string `[]`. -The contents of the first link label are parsed as inlines, -which are used as the link's text. The link's URI and title are -provided by the matching reference link definition. Thus, -`[foo][]` is equivalent to `[foo][foo]`. - -```````````````````````````````` example -[foo][] - -[foo]: /url "title" -. -

    foo

    -```````````````````````````````` - - -```````````````````````````````` example -[*foo* bar][] - -[*foo* bar]: /url "title" -. -

    foo bar

    -```````````````````````````````` - - -The link labels are case-insensitive: - -```````````````````````````````` example -[Foo][] - -[foo]: /url "title" -. -

    Foo

    -```````````````````````````````` - - - -As with full reference links, [whitespace] is not -allowed between the two sets of brackets: - -```````````````````````````````` example -[foo] -[] - -[foo]: /url "title" -. -

    foo -[]

    -```````````````````````````````` - - -A [shortcut reference link](@) -consists of a [link label] that [matches] a -[link reference definition] elsewhere in the -document and is not followed by `[]` or a link label. -The contents of the first link label are parsed as inlines, -which are used as the link's text. The link's URI and title -are provided by the matching link reference definition. -Thus, `[foo]` is equivalent to `[foo][]`. - -```````````````````````````````` example -[foo] - -[foo]: /url "title" -. -

    foo

    -```````````````````````````````` - - -```````````````````````````````` example -[*foo* bar] - -[*foo* bar]: /url "title" -. -

    foo bar

    -```````````````````````````````` - - -```````````````````````````````` example -[[*foo* bar]] - -[*foo* bar]: /url "title" -. -

    [foo bar]

    -```````````````````````````````` - - -```````````````````````````````` example -[[bar [foo] - -[foo]: /url -. -

    [[bar foo

    -```````````````````````````````` - - -The link labels are case-insensitive: - -```````````````````````````````` example -[Foo] - -[foo]: /url "title" -. -

    Foo

    -```````````````````````````````` - - -A space after the link text should be preserved: - -```````````````````````````````` example -[foo] bar - -[foo]: /url -. -

    foo bar

    -```````````````````````````````` - - -If you just want bracketed text, you can backslash-escape the -opening bracket to avoid links: - -```````````````````````````````` example -\[foo] - -[foo]: /url "title" -. -

    [foo]

    -```````````````````````````````` - - -Note that this is a link, because a link label ends with the first -following closing bracket: - -```````````````````````````````` example -[foo*]: /url - -*[foo*] -. -

    *foo*

    -```````````````````````````````` - - -Full and compact references take precedence over shortcut -references: - -```````````````````````````````` example -[foo][bar] - -[foo]: /url1 -[bar]: /url2 -. -

    foo

    -```````````````````````````````` - -```````````````````````````````` example -[foo][] - -[foo]: /url1 -. -

    foo

    -```````````````````````````````` - -Inline links also take precedence: - -```````````````````````````````` example -[foo]() - -[foo]: /url1 -. -

    foo

    -```````````````````````````````` - -```````````````````````````````` example -[foo](not a link) - -[foo]: /url1 -. -

    foo(not a link)

    -```````````````````````````````` - -In the following case `[bar][baz]` is parsed as a reference, -`[foo]` as normal text: - -```````````````````````````````` example -[foo][bar][baz] - -[baz]: /url -. -

    [foo]bar

    -```````````````````````````````` - - -Here, though, `[foo][bar]` is parsed as a reference, since -`[bar]` is defined: - -```````````````````````````````` example -[foo][bar][baz] - -[baz]: /url1 -[bar]: /url2 -. -

    foobaz

    -```````````````````````````````` - - -Here `[foo]` is not parsed as a shortcut reference, because it -is followed by a link label (even though `[bar]` is not defined): - -```````````````````````````````` example -[foo][bar][baz] - -[baz]: /url1 -[foo]: /url2 -. -

    [foo]bar

    -```````````````````````````````` - - - -## Images - -Syntax for images is like the syntax for links, with one -difference. Instead of [link text], we have an -[image description](@). The rules for this are the -same as for [link text], except that (a) an -image description starts with `![` rather than `[`, and -(b) an image description may contain links. -An image description has inline elements -as its contents. When an image is rendered to HTML, -this is standardly used as the image's `alt` attribute. - -```````````````````````````````` example -![foo](/url "title") -. -

    foo

    -```````````````````````````````` - - -```````````````````````````````` example -![foo *bar*] - -[foo *bar*]: train.jpg "train & tracks" -. -

    foo bar

    -```````````````````````````````` - - -```````````````````````````````` example -![foo ![bar](/url)](/url2) -. -

    foo bar

    -```````````````````````````````` - - -```````````````````````````````` example -![foo [bar](/url)](/url2) -. -

    foo bar

    -```````````````````````````````` - - -Though this spec is concerned with parsing, not rendering, it is -recommended that in rendering to HTML, only the plain string content -of the [image description] be used. Note that in -the above example, the alt attribute's value is `foo bar`, not `foo -[bar](/url)` or `foo bar`. Only the plain string -content is rendered, without formatting. - -```````````````````````````````` example -![foo *bar*][] - -[foo *bar*]: train.jpg "train & tracks" -. -

    foo bar

    -```````````````````````````````` - - -```````````````````````````````` example -![foo *bar*][foobar] - -[FOOBAR]: train.jpg "train & tracks" -. -

    foo bar

    -```````````````````````````````` - - -```````````````````````````````` example -![foo](train.jpg) -. -

    foo

    -```````````````````````````````` - - -```````````````````````````````` example -My ![foo bar](/path/to/train.jpg "title" ) -. -

    My foo bar

    -```````````````````````````````` - - -```````````````````````````````` example -![foo]() -. -

    foo

    -```````````````````````````````` - - -```````````````````````````````` example -![](/url) -. -

    -```````````````````````````````` - - -Reference-style: - -```````````````````````````````` example -![foo][bar] - -[bar]: /url -. -

    foo

    -```````````````````````````````` - - -```````````````````````````````` example -![foo][bar] - -[BAR]: /url -. -

    foo

    -```````````````````````````````` - - -Collapsed: - -```````````````````````````````` example -![foo][] - -[foo]: /url "title" -. -

    foo

    -```````````````````````````````` - - -```````````````````````````````` example -![*foo* bar][] - -[*foo* bar]: /url "title" -. -

    foo bar

    -```````````````````````````````` - - -The labels are case-insensitive: - -```````````````````````````````` example -![Foo][] - -[foo]: /url "title" -. -

    Foo

    -```````````````````````````````` - - -As with reference links, [whitespace] is not allowed -between the two sets of brackets: - -```````````````````````````````` example -![foo] -[] - -[foo]: /url "title" -. -

    foo -[]

    -```````````````````````````````` - - -Shortcut: - -```````````````````````````````` example -![foo] - -[foo]: /url "title" -. -

    foo

    -```````````````````````````````` - - -```````````````````````````````` example -![*foo* bar] - -[*foo* bar]: /url "title" -. -

    foo bar

    -```````````````````````````````` - - -Note that link labels cannot contain unescaped brackets: - -```````````````````````````````` example -![[foo]] - -[[foo]]: /url "title" -. -

    ![[foo]]

    -

    [[foo]]: /url "title"

    -```````````````````````````````` - - -The link labels are case-insensitive: - -```````````````````````````````` example -![Foo] - -[foo]: /url "title" -. -

    Foo

    -```````````````````````````````` - - -If you just want a literal `!` followed by bracketed text, you can -backslash-escape the opening `[`: - -```````````````````````````````` example -!\[foo] - -[foo]: /url "title" -. -

    ![foo]

    -```````````````````````````````` - - -If you want a link after a literal `!`, backslash-escape the -`!`: - -```````````````````````````````` example -\![foo] - -[foo]: /url "title" -. -

    !foo

    -```````````````````````````````` - - -## Autolinks - -[Autolink](@)s are absolute URIs and email addresses inside -`<` and `>`. They are parsed as links, with the URL or email address -as the link label. - -A [URI autolink](@) consists of `<`, followed by an -[absolute URI] followed by `>`. It is parsed as -a link to the URI, with the URI as the link's label. - -An [absolute URI](@), -for these purposes, consists of a [scheme] followed by a colon (`:`) -followed by zero or more characters other than ASCII -[whitespace] and control characters, `<`, and `>`. If -the URI includes these characters, they must be percent-encoded -(e.g. `%20` for a space). - -For purposes of this spec, a [scheme](@) is any sequence -of 2--32 characters beginning with an ASCII letter and followed -by any combination of ASCII letters, digits, or the symbols plus -("+"), period ("."), or hyphen ("-"). - -Here are some valid autolinks: - -```````````````````````````````` example - -. -

    http://foo.bar.baz

    -```````````````````````````````` - - -```````````````````````````````` example - -. -

    http://foo.bar.baz/test?q=hello&id=22&boolean

    -```````````````````````````````` - - -```````````````````````````````` example - -. -

    irc://foo.bar:2233/baz

    -```````````````````````````````` - - -Uppercase is also fine: - -```````````````````````````````` example - -. -

    MAILTO:FOO@BAR.BAZ

    -```````````````````````````````` - - -Note that many strings that count as [absolute URIs] for -purposes of this spec are not valid URIs, because their -schemes are not registered or because of other problems -with their syntax: - -```````````````````````````````` example - -. -

    a+b+c:d

    -```````````````````````````````` - - -```````````````````````````````` example - -. -

    made-up-scheme://foo,bar

    -```````````````````````````````` - - -```````````````````````````````` example - -. -

    http://../

    -```````````````````````````````` - - -```````````````````````````````` example - -. -

    localhost:5001/foo

    -```````````````````````````````` - - -Spaces are not allowed in autolinks: - -```````````````````````````````` example - -. -

    <http://foo.bar/baz bim>

    -```````````````````````````````` - - -Backslash-escapes do not work inside autolinks: - -```````````````````````````````` example - -. -

    http://example.com/\[\

    -```````````````````````````````` - - -An [email autolink](@) -consists of `<`, followed by an [email address], -followed by `>`. The link's label is the email address, -and the URL is `mailto:` followed by the email address. - -An [email address](@), -for these purposes, is anything that matches -the [non-normative regex from the HTML5 -spec](https://html.spec.whatwg.org/multipage/forms.html#e-mail-state-(type=email)): - - /^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])? - (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/ - -Examples of email autolinks: - -```````````````````````````````` example - -. -

    foo@bar.example.com

    -```````````````````````````````` - - -```````````````````````````````` example - -. -

    foo+special@Bar.baz-bar0.com

    -```````````````````````````````` - - -Backslash-escapes do not work inside email autolinks: - -```````````````````````````````` example - -. -

    <foo+@bar.example.com>

    -```````````````````````````````` - - -These are not autolinks: - -```````````````````````````````` example -<> -. -

    <>

    -```````````````````````````````` - - -```````````````````````````````` example -< http://foo.bar > -. -

    < http://foo.bar >

    -```````````````````````````````` - - -```````````````````````````````` example - -. -

    <m:abc>

    -```````````````````````````````` - - -```````````````````````````````` example - -. -

    <foo.bar.baz>

    -```````````````````````````````` - - -```````````````````````````````` example -http://example.com -. -

    http://example.com

    -```````````````````````````````` - - -```````````````````````````````` example -foo@bar.example.com -. -

    foo@bar.example.com

    -```````````````````````````````` - - -## Raw HTML - -Text between `<` and `>` that looks like an HTML tag is parsed as a -raw HTML tag and will be rendered in HTML without escaping. -Tag and attribute names are not limited to current HTML tags, -so custom tags (and even, say, DocBook tags) may be used. - -Here is the grammar for tags: - -A [tag name](@) consists of an ASCII letter -followed by zero or more ASCII letters, digits, or -hyphens (`-`). - -An [attribute](@) consists of [whitespace], -an [attribute name], and an optional -[attribute value specification]. - -An [attribute name](@) -consists of an ASCII letter, `_`, or `:`, followed by zero or more ASCII -letters, digits, `_`, `.`, `:`, or `-`. (Note: This is the XML -specification restricted to ASCII. HTML5 is laxer.) - -An [attribute value specification](@) -consists of optional [whitespace], -a `=` character, optional [whitespace], and an [attribute -value]. - -An [attribute value](@) -consists of an [unquoted attribute value], -a [single-quoted attribute value], or a [double-quoted attribute value]. - -An [unquoted attribute value](@) -is a nonempty string of characters not -including [whitespace], `"`, `'`, `=`, `<`, `>`, or `` ` ``. - -A [single-quoted attribute value](@) -consists of `'`, zero or more -characters not including `'`, and a final `'`. - -A [double-quoted attribute value](@) -consists of `"`, zero or more -characters not including `"`, and a final `"`. - -An [open tag](@) consists of a `<` character, a [tag name], -zero or more [attributes], optional [whitespace], an optional `/` -character, and a `>` character. - -A [closing tag](@) consists of the string ``. - -An [HTML comment](@) consists of ``, -where *text* does not start with `>` or `->`, does not end with `-`, -and does not contain `--`. (See the -[HTML5 spec](http://www.w3.org/TR/html5/syntax.html#comments).) - -A [processing instruction](@) -consists of the string ``, and the string -`?>`. - -A [declaration](@) consists of the -string ``, and the character `>`. - -A [CDATA section](@) consists of -the string ``, and the string `]]>`. - -An [HTML tag](@) consists of an [open tag], a [closing tag], -an [HTML comment], a [processing instruction], a [declaration], -or a [CDATA section]. - -Here are some simple open tags: - -```````````````````````````````` example - -. -

    -```````````````````````````````` - - -Empty elements: - -```````````````````````````````` example - -. -

    -```````````````````````````````` - - -[Whitespace] is allowed: - -```````````````````````````````` example - -. -

    -```````````````````````````````` - - -With attributes: - -```````````````````````````````` example - -. -

    -```````````````````````````````` - - -Custom tag names can be used: - -```````````````````````````````` example -Foo -. -

    Foo

    -```````````````````````````````` - - -Illegal tag names, not parsed as HTML: - -```````````````````````````````` example -<33> <__> -. -

    <33> <__>

    -```````````````````````````````` - - -Illegal attribute names: - -```````````````````````````````` example -
    -. -

    <a h*#ref="hi">

    -```````````````````````````````` - - -Illegal attribute values: - -```````````````````````````````` example -
    -. -

    </a href="foo">

    -```````````````````````````````` - - -Comments: - -```````````````````````````````` example -foo -. -

    foo

    -```````````````````````````````` - - -```````````````````````````````` example -foo -. -

    foo <!-- not a comment -- two hyphens -->

    -```````````````````````````````` - - -Not comments: - -```````````````````````````````` example -foo foo --> - -foo -. -

    foo <!--> foo -->

    -

    foo <!-- foo--->

    -```````````````````````````````` - - -Processing instructions: - -```````````````````````````````` example -foo -. -

    foo

    -```````````````````````````````` - - -Declarations: - -```````````````````````````````` example -foo -. -

    foo

    -```````````````````````````````` - - -CDATA sections: - -```````````````````````````````` example -foo &<]]> -. -

    foo &<]]>

    -```````````````````````````````` - - -Entity and numeric character references are preserved in HTML -attributes: - -```````````````````````````````` example -foo
    -. -

    foo

    -```````````````````````````````` - - -Backslash escapes do not work in HTML attributes: - -```````````````````````````````` example -foo -. -

    foo

    -```````````````````````````````` - - -```````````````````````````````` example - -. -

    <a href=""">

    -```````````````````````````````` - - -## Hard line breaks - -A line break (not in a code span or HTML tag) that is preceded -by two or more spaces and does not occur at the end of a block -is parsed as a [hard line break](@) (rendered -in HTML as a `
    ` tag): - -```````````````````````````````` example -foo -baz -. -

    foo
    -baz

    -```````````````````````````````` - - -For a more visible alternative, a backslash before the -[line ending] may be used instead of two spaces: - -```````````````````````````````` example -foo\ -baz -. -

    foo
    -baz

    -```````````````````````````````` - - -More than two spaces can be used: - -```````````````````````````````` example -foo -baz -. -

    foo
    -baz

    -```````````````````````````````` - - -Leading spaces at the beginning of the next line are ignored: - -```````````````````````````````` example -foo - bar -. -

    foo
    -bar

    -```````````````````````````````` - - -```````````````````````````````` example -foo\ - bar -. -

    foo
    -bar

    -```````````````````````````````` - - -Line breaks can occur inside emphasis, links, and other constructs -that allow inline content: - -```````````````````````````````` example -*foo -bar* -. -

    foo
    -bar

    -```````````````````````````````` - - -```````````````````````````````` example -*foo\ -bar* -. -

    foo
    -bar

    -```````````````````````````````` - - -Line breaks do not occur inside code spans - -```````````````````````````````` example -`code -span` -. -

    code span

    -```````````````````````````````` - - -```````````````````````````````` example -`code\ -span` -. -

    code\ span

    -```````````````````````````````` - - -or HTML tags: - -```````````````````````````````` example -
    -. -

    -```````````````````````````````` - - -```````````````````````````````` example - -. -

    -```````````````````````````````` - - -Hard line breaks are for separating inline content within a block. -Neither syntax for hard line breaks works at the end of a paragraph or -other block element: - -```````````````````````````````` example -foo\ -. -

    foo\

    -```````````````````````````````` - - -```````````````````````````````` example -foo -. -

    foo

    -```````````````````````````````` - - -```````````````````````````````` example -### foo\ -. -

    foo\

    -```````````````````````````````` - - -```````````````````````````````` example -### foo -. -

    foo

    -```````````````````````````````` - - -## Soft line breaks - -A regular line break (not in a code span or HTML tag) that is not -preceded by two or more spaces or a backslash is parsed as a -[softbreak](@). (A softbreak may be rendered in HTML either as a -[line ending] or as a space. The result will be the same in -browsers. In the examples here, a [line ending] will be used.) - -```````````````````````````````` example -foo -baz -. -

    foo -baz

    -```````````````````````````````` - - -Spaces at the end of the line and beginning of the next line are -removed: - -```````````````````````````````` example -foo - baz -. -

    foo -baz

    -```````````````````````````````` - - -A conforming parser may render a soft line break in HTML either as a -line break or as a space. - -A renderer may also provide an option to render soft line breaks -as hard line breaks. - -## Textual content - -Any characters not given an interpretation by the above rules will -be parsed as plain textual content. - -```````````````````````````````` example -hello $.;'there -. -

    hello $.;'there

    -```````````````````````````````` - - -```````````````````````````````` example -Foo χρῆν -. -

    Foo χρῆν

    -```````````````````````````````` - - -Internal spaces are preserved verbatim: - -```````````````````````````````` example -Multiple spaces -. -

    Multiple spaces

    -```````````````````````````````` - - - - -# Appendix: A parsing strategy - -In this appendix we describe some features of the parsing strategy -used in the CommonMark reference implementations. - -## Overview - -Parsing has two phases: - -1. In the first phase, lines of input are consumed and the block -structure of the document---its division into paragraphs, block quotes, -list items, and so on---is constructed. Text is assigned to these -blocks but not parsed. Link reference definitions are parsed and a -map of links is constructed. - -2. In the second phase, the raw text contents of paragraphs and headings -are parsed into sequences of Markdown inline elements (strings, -code spans, links, emphasis, and so on), using the map of link -references constructed in phase 1. - -At each point in processing, the document is represented as a tree of -**blocks**. The root of the tree is a `document` block. The `document` -may have any number of other blocks as **children**. These children -may, in turn, have other blocks as children. The last child of a block -is normally considered **open**, meaning that subsequent lines of input -can alter its contents. (Blocks that are not open are **closed**.) -Here, for example, is a possible document tree, with the open blocks -marked by arrows: - -``` tree --> document - -> block_quote - paragraph - "Lorem ipsum dolor\nsit amet." - -> list (type=bullet tight=true bullet_char=-) - list_item - paragraph - "Qui *quodsi iracundia*" - -> list_item - -> paragraph - "aliquando id" -``` - -## Phase 1: block structure - -Each line that is processed has an effect on this tree. The line is -analyzed and, depending on its contents, the document may be altered -in one or more of the following ways: - -1. One or more open blocks may be closed. -2. One or more new blocks may be created as children of the - last open block. -3. Text may be added to the last (deepest) open block remaining - on the tree. - -Once a line has been incorporated into the tree in this way, -it can be discarded, so input can be read in a stream. - -For each line, we follow this procedure: - -1. First we iterate through the open blocks, starting with the -root document, and descending through last children down to the last -open block. Each block imposes a condition that the line must satisfy -if the block is to remain open. For example, a block quote requires a -`>` character. A paragraph requires a non-blank line. -In this phase we may match all or just some of the open -blocks. But we cannot close unmatched blocks yet, because we may have a -[lazy continuation line]. - -2. Next, after consuming the continuation markers for existing -blocks, we look for new block starts (e.g. `>` for a block quote). -If we encounter a new block start, we close any blocks unmatched -in step 1 before creating the new block as a child of the last -matched block. - -3. Finally, we look at the remainder of the line (after block -markers like `>`, list markers, and indentation have been consumed). -This is text that can be incorporated into the last open -block (a paragraph, code block, heading, or raw HTML). - -Setext headings are formed when we see a line of a paragraph -that is a [setext heading underline]. - -Reference link definitions are detected when a paragraph is closed; -the accumulated text lines are parsed to see if they begin with -one or more reference link definitions. Any remainder becomes a -normal paragraph. - -We can see how this works by considering how the tree above is -generated by four lines of Markdown: - -``` markdown -> Lorem ipsum dolor -sit amet. -> - Qui *quodsi iracundia* -> - aliquando id -``` - -At the outset, our document model is just - -``` tree --> document -``` - -The first line of our text, - -``` markdown -> Lorem ipsum dolor -``` - -causes a `block_quote` block to be created as a child of our -open `document` block, and a `paragraph` block as a child of -the `block_quote`. Then the text is added to the last open -block, the `paragraph`: - -``` tree --> document - -> block_quote - -> paragraph - "Lorem ipsum dolor" -``` - -The next line, - -``` markdown -sit amet. -``` - -is a "lazy continuation" of the open `paragraph`, so it gets added -to the paragraph's text: - -``` tree --> document - -> block_quote - -> paragraph - "Lorem ipsum dolor\nsit amet." -``` - -The third line, - -``` markdown -> - Qui *quodsi iracundia* -``` - -causes the `paragraph` block to be closed, and a new `list` block -opened as a child of the `block_quote`. A `list_item` is also -added as a child of the `list`, and a `paragraph` as a child of -the `list_item`. The text is then added to the new `paragraph`: - -``` tree --> document - -> block_quote - paragraph - "Lorem ipsum dolor\nsit amet." - -> list (type=bullet tight=true bullet_char=-) - -> list_item - -> paragraph - "Qui *quodsi iracundia*" -``` - -The fourth line, - -``` markdown -> - aliquando id -``` - -causes the `list_item` (and its child the `paragraph`) to be closed, -and a new `list_item` opened up as child of the `list`. A `paragraph` -is added as a child of the new `list_item`, to contain the text. -We thus obtain the final tree: - -``` tree --> document - -> block_quote - paragraph - "Lorem ipsum dolor\nsit amet." - -> list (type=bullet tight=true bullet_char=-) - list_item - paragraph - "Qui *quodsi iracundia*" - -> list_item - -> paragraph - "aliquando id" -``` - -## Phase 2: inline structure - -Once all of the input has been parsed, all open blocks are closed. - -We then "walk the tree," visiting every node, and parse raw -string contents of paragraphs and headings as inlines. At this -point we have seen all the link reference definitions, so we can -resolve reference links as we go. - -``` tree -document - block_quote - paragraph - str "Lorem ipsum dolor" - softbreak - str "sit amet." - list (type=bullet tight=true bullet_char=-) - list_item - paragraph - str "Qui " - emph - str "quodsi iracundia" - list_item - paragraph - str "aliquando id" -``` - -Notice how the [line ending] in the first paragraph has -been parsed as a `softbreak`, and the asterisks in the first list item -have become an `emph`. - -### An algorithm for parsing nested emphasis and links - -By far the trickiest part of inline parsing is handling emphasis, -strong emphasis, links, and images. This is done using the following -algorithm. - -When we're parsing inlines and we hit either - -- a run of `*` or `_` characters, or -- a `[` or `![` - -we insert a text node with these symbols as its literal content, and we -add a pointer to this text node to the [delimiter stack](@). - -The [delimiter stack] is a doubly linked list. Each -element contains a pointer to a text node, plus information about - -- the type of delimiter (`[`, `![`, `*`, `_`) -- the number of delimiters, -- whether the delimiter is "active" (all are active to start), and -- whether the delimiter is a potential opener, a potential closer, - or both (which depends on what sort of characters precede - and follow the delimiters). - -When we hit a `]` character, we call the *look for link or image* -procedure (see below). - -When we hit the end of the input, we call the *process emphasis* -procedure (see below), with `stack_bottom` = NULL. - -#### *look for link or image* - -Starting at the top of the delimiter stack, we look backwards -through the stack for an opening `[` or `![` delimiter. - -- If we don't find one, we return a literal text node `]`. - -- If we do find one, but it's not *active*, we remove the inactive - delimiter from the stack, and return a literal text node `]`. - -- If we find one and it's active, then we parse ahead to see if - we have an inline link/image, reference link/image, compact reference - link/image, or shortcut reference link/image. - - + If we don't, then we remove the opening delimiter from the - delimiter stack and return a literal text node `]`. - - + If we do, then - - * We return a link or image node whose children are the inlines - after the text node pointed to by the opening delimiter. - - * We run *process emphasis* on these inlines, with the `[` opener - as `stack_bottom`. - - * We remove the opening delimiter. - - * If we have a link (and not an image), we also set all - `[` delimiters before the opening delimiter to *inactive*. (This - will prevent us from getting links within links.) - -#### *process emphasis* - -Parameter `stack_bottom` sets a lower bound to how far we -descend in the [delimiter stack]. If it is NULL, we can -go all the way to the bottom. Otherwise, we stop before -visiting `stack_bottom`. - -Let `current_position` point to the element on the [delimiter stack] -just above `stack_bottom` (or the first element if `stack_bottom` -is NULL). - -We keep track of the `openers_bottom` for each delimiter -type (`*`, `_`) and each length of the closing delimiter run -(modulo 3). Initialize this to `stack_bottom`. - -Then we repeat the following until we run out of potential -closers: - -- Move `current_position` forward in the delimiter stack (if needed) - until we find the first potential closer with delimiter `*` or `_`. - (This will be the potential closer closest - to the beginning of the input -- the first one in parse order.) - -- Now, look back in the stack (staying above `stack_bottom` and - the `openers_bottom` for this delimiter type) for the - first matching potential opener ("matching" means same delimiter). - -- If one is found: - - + Figure out whether we have emphasis or strong emphasis: - if both closer and opener spans have length >= 2, we have - strong, otherwise regular. - - + Insert an emph or strong emph node accordingly, after - the text node corresponding to the opener. - - + Remove any delimiters between the opener and closer from - the delimiter stack. - - + Remove 1 (for regular emph) or 2 (for strong emph) delimiters - from the opening and closing text nodes. If they become empty - as a result, remove them and remove the corresponding element - of the delimiter stack. If the closing node is removed, reset - `current_position` to the next element in the stack. - -- If none is found: - - + Set `openers_bottom` to the element before `current_position`. - (We know that there are no openers for this kind of closer up to and - including this point, so this puts a lower bound on future searches.) - - + If the closer at `current_position` is not a potential opener, - remove it from the delimiter stack (since we know it can't - be a closer either). - - + Advance `current_position` to the next element in the stack. - -After we're done, we remove all delimiters above `stack_bottom` from the -delimiter stack. - diff --git a/packages/markdown-transform/test/data/acceptance/sample.html b/packages/markdown-transform/test/data/acceptance/sample.html index 382bfe28..ae8e490b 100644 --- a/packages/markdown-transform/test/data/acceptance/sample.html +++ b/packages/markdown-transform/test/data/acceptance/sample.html @@ -3,21 +3,21 @@

    Heading

    And below is a clause.

    -
    +

    Acceptance of Delivery.

    -

    "Party A" will be deemed to have completed its delivery obligations -if in "Party B"'s opinion, the "Widgets" satisfies the -Acceptance Criteria, and "Party B" notifies "Party A" in writing -that it is accepting the "Widgets".

    +

    "Party A" will be deemed to have completed its delivery obligations +if in "Party B"'s opinion, the "Widgets" satisfies the +Acceptance Criteria, and "Party B" notifies "Party A" in writing +that it is accepting the "Widgets".

    Inspection and Notice.

    -

    "Party B" will have 10 Business Days to inspect and -evaluate the "Widgets" on the delivery date before notifying -"Party A" that it is either accepting or rejecting the -"Widgets".

    +

    "Party B" will have 10 Business Days to inspect and +evaluate the "Widgets" on the delivery date before notifying +"Party A" that it is either accepting or rejecting the +"Widgets".

    Acceptance Criteria.

    -

    The "Acceptance Criteria" are the specifications the "Widgets" -must meet for the "Party A" to comply with its requirements and -obligations under this agreement, detailed in "Attachment X", attached +

    The "Acceptance Criteria" are the specifications the "Widgets" +must meet for the "Party A" to comply with its requirements and +obligations under this agreement, detailed in "Attachment X", attached to this agreement.

    More text