From debfbfcf09749f020a9f4b7090cb7a81e0d32de0 Mon Sep 17 00:00:00 2001 From: Ryan Barrett Date: Fri, 8 Feb 2019 07:33:36 -0800 Subject: [PATCH] start on RSS output for #124 --- README.md | 1 + granary/rss.py | 121 ++++++++++++++++++ granary/tests/test_testdata.py | 9 +- .../testdata/feed_with_audio_video.as.json | 32 +++++ .../testdata/feed_with_audio_video.mf2.json | 52 ++++++++ .../testdata/feed_with_audio_video.rss.xml | 42 ++++++ requirements.freeze.txt | 1 + requirements.txt | 1 + setup.py | 1 + 9 files changed, 259 insertions(+), 1 deletion(-) create mode 100644 granary/rss.py create mode 100644 granary/tests/testdata/feed_with_audio_video.as.json create mode 100644 granary/tests/testdata/feed_with_audio_video.mf2.json create mode 100644 granary/tests/testdata/feed_with_audio_video.rss.xml diff --git a/README.md b/README.md index 5fcc4870..40086bef 100644 --- a/README.md +++ b/README.md @@ -303,6 +303,7 @@ On the open source side, there are many related projects. [php-mf2-shim](https:/ Changelog --- ### 1.15 - unreleased +* Add RSS 2.0 output! ([#124](https://github.com/snarfed/granary/issues/124)) * All silos: * Switch users' primary URLs from web site to silo profile ([#158](https://github.com/snarfed/granary/issues/158)). * GitHub: diff --git a/granary/rss.py b/granary/rss.py new file mode 100644 index 00000000..c84e4a4e --- /dev/null +++ b/granary/rss.py @@ -0,0 +1,121 @@ +"""Convert between ActivityStreams and RSS 2.0. + +RSS 2.0 spec: http://www.rssboard.org/rss-specification +""" +from __future__ import absolute_import, unicode_literals +from builtins import str +from past.builtins import basestring + +import mimetypes + +from feedgen.feed import FeedGenerator +import mf2util +from oauth_dropins.webutil import util + +from . import microformats2 + +# allowed ActivityStreams objectTypes for media enclosures +ENCLOSURE_TYPES = {'audio', 'video'} + + +def from_activities(activities, actor=None, title=None, description=None, + feed_url=None, home_page_url=None, image_url=None): + """Converts ActivityStreams activities to an RSS 2.0 feed. + + Args: + activities: sequence of ActivityStreams activity dicts + actor: ActivityStreams actor dict, the author of the feed + title: string, the feed title + description, the feed description + home_page_url: string, the home page URL + # feed_url: the URL of this RSS feed, if any + image_url: the URL of an image representing this feed + + Returns: + unicode string with RSS 2.0 XML + """ + try: + iter(activities) + except TypeError: + raise TypeError('activities must be iterable') + + if isinstance(activities, (dict, basestring)): + raise TypeError('activities may not be a dict or string') + + fg = FeedGenerator() + fg.id(feed_url) + fg.link(href=feed_url, rel='self') + fg.link(href=home_page_url, rel='alternate') + fg.title(title) + fg.description(description) + fg.generator('granary', uri='https://granary.io/') + if image_url: + fg.image(image_url) + + latest = None + for activity in activities: + obj = activity.get('object') or activity + if obj.get('objectType') == 'person': + continue + + item = fg.add_entry() + url = obj.get('url') + item.id(obj.get('id') or url) + item.link(href=url) + item.guid(url, permalink=True) + + item.title(obj.get('title') or obj.get('displayName')) + content = microformats2.render_content( + obj, include_location=True, render_attachments=False) or obj.get('summary') + if content: + item.content(content, type='CDATA') + + item.category([{'term': t.displayName} for t in obj.get('tags', []) + if t.displayName and t.verb not in ('like', 'react', 'share')]) + + author = obj.get('author', {}) + item.author({ + 'name': author.get('displayName') or author.get('username'), + 'uri': author.get('url'), + }) + + for prop in 'published', 'updated': + val = obj.get(prop) + if val: + dt = util.parse_iso8601(val) + getattr(item, prop)(dt) + if not latest or dt > latest: + latest = dt + + enclosures = False + for att in obj.get('attachments', []): + stream = util.get_first(att, 'stream') or att + if not stream: + continue + + url = stream.get('url') + mime = mimetypes.guess_type(url)[0] if url else None + if (att.get('objectType') in ENCLOSURE_TYPES or + mime and mime.split('/')[0] in ENCLOSURE_TYPES): + enclosures = True + item.enclosure(url=url, type=mime) # TODO: length (bytes) + + item.load_extension('podcast') + duration = stream.get('duration') + if duration: + item.podcast.itunes_duration(duration) + + if enclosures: + fg.load_extension('podcast') + if actor: + fg.podcast.itunes_author(actor.get('displayName') or actor.get('username')) + fg.podcast.itunes_image(image_url) + if description: + fg.podcast.itunes_subtitle(description) + fg.podcast.itunes_explicit('no') + fg.podcast.itunes_block(False) + + if latest: + fg.lastBuildDate(dt) + + return fg.rss_str(pretty=True) diff --git a/granary/tests/test_testdata.py b/granary/tests/test_testdata.py index f0361716..8e0977bd 100644 --- a/granary/tests/test_testdata.py +++ b/granary/tests/test_testdata.py @@ -13,7 +13,7 @@ from oauth_dropins.webutil import testutil from oauth_dropins.webutil import util -from granary import as2, jsonfeed, microformats2 +from granary import as2, jsonfeed, microformats2, rss def filepairs(ext1, ext2s): @@ -80,6 +80,12 @@ def jsonfeed_to_activity(jf): def html_to_activity(html): return microformats2.html_to_activities(html)[0]['object'] +def rss_from_activities(activities): + return rss.from_activities( + activities, actor=ACTOR, title='Stuff', description='some stuff by meee', + feed_url='http://site/feed', home_page_url='http://site/', + image_url='http://site/logo.png').decode('utf-8') + # source extension, destination extension, conversion function, exclude prefix mappings = ( ('as.json', ['mf2-from-as.json', 'mf2.json'], microformats2.object_to_json, ()), @@ -94,6 +100,7 @@ def html_to_activity(html): ('feed.json', ['as-from-feed.json', 'as.json'], jsonfeed_to_activity, ()), ('as.json', ['as2-from-as.json', 'as2.json'], as2.from_as1, ()), ('as2.json', ['as-from-as2.json', 'as.json'], as2.to_as1, ()), + ('as.json', ['rss.xml'], rss_from_activities, ()), ) test_funcs = {} diff --git a/granary/tests/testdata/feed_with_audio_video.as.json b/granary/tests/testdata/feed_with_audio_video.as.json new file mode 100644 index 00000000..46cc04c9 --- /dev/null +++ b/granary/tests/testdata/feed_with_audio_video.as.json @@ -0,0 +1,32 @@ +[{ + "url": "http://podcast/post", + "objectType": "article", + "displayName": "i'm ready to speak", + "content": "

some HTML

", + "published": "2012-12-05T00:58:26+00:00", + "attachments": [{ + "stream": { + "url": "http://a/podcast.mp3", + "duration": 328 + }, + "objectType": "audio" + }] +}, +{ + "url": "http://vidjo/post", + "objectType": "article", + "displayName": "i'm ready to perform", + "summary": "other thing", + "updated": "2012-12-06T00:58:26+00:00", + "attachments": [{ + "stream": { + "url": "http://a/vidjo.mov", + "duration": 428 + }, + "objectType": "video" + }], + "stream": [{ + "url": "http://a/vidjo.mov", + "duration": 428 + }] +}] diff --git a/granary/tests/testdata/feed_with_audio_video.mf2.json b/granary/tests/testdata/feed_with_audio_video.mf2.json new file mode 100644 index 00000000..394aab95 --- /dev/null +++ b/granary/tests/testdata/feed_with_audio_video.mf2.json @@ -0,0 +1,52 @@ +{ + "items": [{ + "type": ["h-feed"], + "lang": "en", + "properties": { + "name": ["Pawd Kaast"], + "summary": ["a pawd kaast by meee"], + "photo": ["https://cover/"], + "author": [{ + "type": ["h-card"], + "properties": { + "name": ["Meee"], + "photo": ["https://photo/of/meee"], + "url": ["https://meee.com"], + "bio": [{ + "html": "my bio", + "value": "my bio" + }] + }, + "value": "Meeeeee" + }] + }, + "children": [{ + "type": ["h-entry"], + "properties": { + "url": ["http://a/podcast"], + "name": ["i'm ready to speak"], + "audio": ["http://a/podcast.mp3"], + "duration": ["328"], + "size": ["7.77mb"], + "summary": [{ + "html": "something", + "value": "something" + }] + } + }, + { + "type": ["h-entry"], + "properties": { + "name": ["i'm ready to perform"], + "url": ["http://a/vidjo"], + "video": ["http://a/vidjo.mov"], + "duration": ["428"], + "size": ["8.88mb"], + "summary": [{ + "html": "other thing", + "value": "other thing" + }] + } + }] + }] +} diff --git a/granary/tests/testdata/feed_with_audio_video.rss.xml b/granary/tests/testdata/feed_with_audio_video.rss.xml new file mode 100644 index 00000000..e417325b --- /dev/null +++ b/granary/tests/testdata/feed_with_audio_video.rss.xml @@ -0,0 +1,42 @@ + + + + Stuff + http://site/ + some stuff by meee + + http://www.rssboard.org/rss-specification + granary + + http://site/logo.png + Stuff + http://site/ + + Thu, 06 Dec 2012 00:58:26 +0000 + Martin Smith + no + + no + some stuff by meee + + + i'm ready to perform + http://vidjo/post + other thing + http://vidjo/post + + 428 + + + + i'm ready to speak + http://podcast/post + <p>some HTML</p> + http://podcast/post + + Wed, 05 Dec 2012 00:58:26 +0000 + 328 + + + + diff --git a/requirements.freeze.txt b/requirements.freeze.txt index 0cf98831..c40d034b 100644 --- a/requirements.freeze.txt +++ b/requirements.freeze.txt @@ -4,6 +4,7 @@ certifi==2018.4.16 chardet==3.0.4 coverage==4.0.3 -e git+https://github.com/snarfed/gdata-python-client-1.git@1df4e1efea7e5cf2754bc7eec6c1ab48ab09e3b1#egg=gdata +feedgen==0.7.0 future==0.16.0 google-api-python-client==1.7.4 html2text==2018.1.9 diff --git a/requirements.txt b/requirements.txt index 8237b6f4..2c0a2502 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ # Keep in sync with setup.py's install_requires! beautifulsoup4 brevity>=0.2.17 +feedgen>=0.7.0 future html2text jinja2 diff --git a/setup.py b/setup.py index 7ccd4e2d..818257de 100644 --- a/setup.py +++ b/setup.py @@ -46,6 +46,7 @@ def __init__(self, *args, **kwargs): # Keep in sync with requirements.txt! 'beautifulsoup4', 'brevity>=0.2.17', + 'feedgen>=0.7.0', 'future', 'html2text', 'jinja2',