From 6a95384b6634e2b548036f94b9e18913a52e8847 Mon Sep 17 00:00:00 2001 From: Peter Velkov Date: Tue, 12 Mar 2024 15:11:50 +0200 Subject: [PATCH] Fix ExpensiMark: prevent html tags in alt attribute Related to: https://github.com/Expensify/expensify-common/pull/658#discussion_r1508088239 Content intended for the alt attribute in images is being incorrectly parsed from Markdown to HTML if it contains MD special characters --- __tests__/ExpensiMark-HTML-test.js | 15 ++++++-------- __tests__/ExpensiMark-Markdown-test.js | 6 ++++++ lib/ExpensiMark.js | 28 ++++++++++++++++++++++++-- 3 files changed, 38 insertions(+), 11 deletions(-) diff --git a/__tests__/ExpensiMark-HTML-test.js b/__tests__/ExpensiMark-HTML-test.js index 8d918590..0e61ec58 100644 --- a/__tests__/ExpensiMark-HTML-test.js +++ b/__tests__/ExpensiMark-HTML-test.js @@ -1779,14 +1779,14 @@ describe('when should keep raw input flag is enabled', () => { }); }); }); - + test('Test code fence within inline code', () => { let testString = 'Hello world `(```test```)` Hello world'; expect(parser.replace(testString)).toBe('Hello world `(
test
)` Hello world'); - + testString = 'Hello world `(```test\ntest```)` Hello world'; expect(parser.replace(testString)).toBe('Hello world `(
test
test
)` Hello world'); - + testString = 'Hello world ```(`test`)``` Hello world'; expect(parser.replace(testString)).toBe('Hello world
(`test`)
Hello world'); @@ -1893,12 +1893,9 @@ describe('Image markdown conversion to html tag', () => { expect(parser.replace(testString)).toBe(resultString); }); - // Currently any markdown used inside the square brackets is converted to html string in the alt attribute - // The attributes should only contain plain text, but it doesn't seem possible to convert markdown to plain text - // or let the parser know not to convert markdown to html for html attributes - xtest('Image with alt text containing markdown', () => { - const testString = '![*bold* _italic_ ~strike~](https://example.com/image.png)'; - const resultString = '*bold* _italic_ ~strike~'; + test('Image with alt text containing markdown', () => { + const testString = '![# fake-heading *bold* _italic_ ~strike~ [:-)]](https://example.com/image.png)'; + const resultString = '# fake-heading *bold* _italic_ ~strike~ [:-)]'; expect(parser.replace(testString)).toBe(resultString); }); diff --git a/__tests__/ExpensiMark-Markdown-test.js b/__tests__/ExpensiMark-Markdown-test.js index 2adce168..ada9fd90 100644 --- a/__tests__/ExpensiMark-Markdown-test.js +++ b/__tests__/ExpensiMark-Markdown-test.js @@ -769,4 +769,10 @@ describe('Image tag conversion to markdown', () => { const resultString = '![https://example.com/image.png](https://example.com/image.png)'; expect(parser.htmlToMarkdown(testString)).toBe(resultString); }); + + test('Image with alt text containing escaped markdown', () => { + const testString = '*bold* _italic_ ~strike~'; + const resultString = '![*bold* _italic_ ~strike~](https://example.com/image.png)'; + expect(parser.htmlToMarkdown(testString)).toBe(resultString); + }); }); diff --git a/lib/ExpensiMark.js b/lib/ExpensiMark.js index 4a23de8f..4b98583c 100644 --- a/lib/ExpensiMark.js +++ b/lib/ExpensiMark.js @@ -113,12 +113,13 @@ export default class ExpensiMark { * Converts markdown style images to img tags e.g. ![Expensify](https://www.expensify.com/attachment.png) * We need to convert before linking rules since they will not try to create a link from an existing img * tag. + * Additional sanitization is done to the alt attribute to prevent parsing it further to html by later rules. */ { name: 'image', regex: MARKDOWN_IMAGE_REGEX, - replacement: (match, g1, g2) => `${g1}`, - rawInputReplacement: (match, g1, g2) => `${g1}` + replacement: (match, g1, g2) => `${this.escapeMarkdownEntities(g1)}`, + rawInputReplacement: (match, g1, g2) => `${this.escapeMarkdownEntities(g1)}` }, /** @@ -945,4 +946,27 @@ export default class ExpensiMark { const linksInNew = this.extractLinksInMarkdownComment(newComment); return linksInOld === undefined || linksInNew === undefined ? [] : _.difference(linksInOld, linksInNew); } + + /** + * Replace MD characters with their HTML entity equivalent + * @param {String} text + * @return {String} + */ + escapeMarkdownEntities(text) { + // A regex pattern matching special MD characters we'd like to escape + const pattern = /([*_{}[\]~])/g; + + // A map of MD characters to their HTML entity equivalent + const entities = { + '*': '*', + _: '_', + '{': '{', + '}': '}', + '[': '[', + ']': ']', + '~': '~', + }; + + return text.replace(pattern, char => entities[char] || char); + } }