diff --git a/README.md b/README.md index 72eb3271..eb2db43f 100644 --- a/README.md +++ b/README.md @@ -314,6 +314,7 @@ Changelog * Gracefully handle when `content_html` and `content_text` are [incorrectly](https://jsonfeed.org/version/1#items) lists instead of strings. * HTML/microformats2: * Add `aria-hidden="true"` to empty links ([bridgy#947](https://github.com/snarfed/bridgy/issues/947)). + * Bug fix: escape `&`, `<`, and `>` characters in bare mf2 `content` properties ([aaronpk/XRay#102](https://github.com/aaronpk/XRay/issues/102)). * GitHub: * Handle [HTTP 451 Unavailable for Legal Reasons](https://en.wikipedia.org/wiki/HTTP_451) responses ([eg for DMCA takedowns](https://developer.github.com/changes/2016-03-17-the-451-status-code-is-now-supported/)) gracefully. * Instagram: diff --git a/granary/microformats2.py b/granary/microformats2.py index d367836e..e4566104 100644 --- a/granary/microformats2.py +++ b/granary/microformats2.py @@ -5,6 +5,7 @@ """ from collections import defaultdict import copy +import html import itertools import logging import urllib.parse @@ -122,7 +123,7 @@ def get_html(val): if isinstance(val, dict) and val.get('html'): return val['html'].strip() - return get_text(val) + return html.escape(get_text(val), quote=False) def get_text(val): diff --git a/granary/tests/test_testdata.py b/granary/tests/test_testdata.py index 67517f57..b1e78cd8 100644 --- a/granary/tests/test_testdata.py +++ b/granary/tests/test_testdata.py @@ -91,11 +91,11 @@ def rss_from_activities(activities): mappings = ( ('as.json', ['mf2-from-as.json', 'mf2.json'], microformats2.object_to_json, # doesn't handle h-feed yet - ('feed_with_audio_video')), + ('feed_with_audio_video',)), ('as.json', ['mf2-from-as.html', 'mf2.html'], microformats2.object_to_html, ()), ('mf2.json', ['as-from-mf2.json', 'as.json'], microformats2.json_to_object, # doesn't handle h-feed yet - ('feed_with_audio_video')), + ('feed_with_audio_video',)), ('mf2.json', ['mf2-from-json.html', 'mf2.html'], microformats2.json_to_html, # we do not format h-media photos properly in html ('note_with_composite_photo',)), diff --git a/granary/tests/testdata/article_escape_html_in_content.as.json b/granary/tests/testdata/article_escape_html_in_content.as.json new file mode 100644 index 00000000..714c1108 --- /dev/null +++ b/granary/tests/testdata/article_escape_html_in_content.as.json @@ -0,0 +1,5 @@ +{ + "objectType": "note", + "url": "https://twitter.com/simonw/status/1341785945364811776", + "content": "The \"Let's use client-side JavaScript rendering for everything! A web page should be a single <script> tag!\" crowd have held the developer marketing advantage for far too long" +} diff --git a/granary/tests/testdata/article_escape_html_in_content.mf2.html b/granary/tests/testdata/article_escape_html_in_content.mf2.html new file mode 100644 index 00000000..b26f2688 --- /dev/null +++ b/granary/tests/testdata/article_escape_html_in_content.mf2.html @@ -0,0 +1,7 @@ +
+ + https://twitter.com/simonw/status/1341785945364811776 +
+ The "Let's use client-side JavaScript rendering for everything! A web page should be a single <script> tag!" crowd have held the developer marketing advantage for far too long +
+
diff --git a/granary/tests/testdata/article_escape_html_in_content.mf2.json b/granary/tests/testdata/article_escape_html_in_content.mf2.json new file mode 100644 index 00000000..8c6d10e1 --- /dev/null +++ b/granary/tests/testdata/article_escape_html_in_content.mf2.json @@ -0,0 +1,7 @@ +{ + "type": ["h-entry"], + "properties": { + "url": ["https://twitter.com/simonw/status/1341785945364811776"], + "content": ["The \"Let's use client-side JavaScript rendering for everything! A web page should be a single