Merge pull request #147 from bdesham/include-images-inline

JSON Feed: Include images in an item's "content_html" (fixes #146)
snarfed · May 13, 2018 · 8d51bfd · 8d51bfd
2 parents 4ca6cff + 9e0c1d1
commit 8d51bfd
Show file tree

Hide file tree

Showing 2 changed files with 31 additions and 2 deletions.
diff --git a/granary/jsonfeed.py b/granary/jsonfeed.py
@@ -14,6 +14,9 @@
 # allowed ActivityStreams objectTypes for attachments
 ATTACHMENT_TYPES = {'image', 'audio', 'video'}
 
+# format string for inserting a JSON Feed entry's image into its content_html
+HTML_IMAGE_TEMPLATE = '<p><img src="{}"/></p>'
+
 
 def activities_to_jsonfeed(activities, actor=None, title=None, feed_url=None,
                            home_page_url=None):
@@ -52,7 +55,18 @@ def actor_name(obj):
     if obj.get('objectType') == 'person':
       continue
     author = obj.get('author', {})
+
     content = obj.get('content')
+    # The JSON Feed spec (https://jsonfeed.org/version/1#items) says that the
+    # URL from the "image" property may also appear in "content_html", in which
+    # case it should be interpreted as the "main, featured image" of the
+    # post. It does not specify the behavior or semantics in the case that the
+    # image does *not* appear in "content_html", but currently at least one
+    # feed reader (Feedbin) will not display the image as part of the post
+    # content unless it is explicitly included in "content_html".
+    if content and image_url(obj):
+      content += HTML_IMAGE_TEMPLATE.format(image_url(obj))
+
     obj_title = obj.get('title') or obj.get('displayName')
     item = {
       'id': obj.get('id') or obj.get('url'),
@@ -138,11 +152,26 @@ def attachment(jf):
       as1['url'] = url
     return as1
 
+  def content(item):
+    if item.get('content_html'):
+      content = item.get('content_html')
+
+      # If the content_html ends with an <img> tag that could have been generated
+      # by Granary (and which is redundant with the entry's "image" property),
+      # remove that tag.
+      if item.get('image'):
+        image_html = HTML_IMAGE_TEMPLATE.format(item.get('image'))
+        if content.endswith(image_html):
+          content = content[:-len(image_html)]
+    else:
+      content = item.get('content_text')
+    return content
+
   activities = [{'object': {
     'objectType': 'article' if item.get('title') else 'note',
     'title': item.get('title'),
     'summary': item.get('summary'),
-    'content': item.get('content_html') or item.get('content_text'),
+    'content': content(item),
     'id': str(item.get('id') or ''),
     'published': item.get('date_published'),
     'updated': item.get('date_modified'),

diff --git a/granary/test/testdata/note.feed.json b/granary/test/testdata/note.feed.json
@@ -12,7 +12,7 @@
     "url": "http://example.com/blog-post-123",
     "image": "http://example.com/blog-post-123/image",
     "summary": "too cool to summarize",
-    "content_html": "A note. link too",
+    "content_html": "A note. link too<p><img src=\"http://example.com/blog-post-123/image\"/></p>",
     "date_published": "2012-02-22T20:26:41",
     "date_modified": "2013-10-25T10:31:30+00:00",
     "author": {