Skip to content

Commit

Permalink
bluesky: convert HTML summary and content to plain text
Browse files Browse the repository at this point in the history
  • Loading branch information
snarfed committed Oct 5, 2023
1 parent 1259e63 commit a1d1592
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 7 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@ Changelog
* Add new `blob_to_url` function.
* `from_as1`: add `out_type` kwarg to specify desired output type, eg `app.bsky.actor.profile` vs `app.bsky.actor.defs#profileViewBasic` vs `app.bsky.actor.defs#profileView`.
* Delete `as1_to_profile`, switch `from_as1` to return `$type: app.bsky.actor.profile`.
* Convert HTML `summary` and `content` to plain text.
* `facebook`:
* Remove `Facebook.fql_stream_to_post`. [Facebook turned down FQL in 2016.](https://en.wikipedia.org/wiki/Facebook_Query_Language#History)

Expand Down
6 changes: 3 additions & 3 deletions granary/bluesky.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from oauth_dropins.webutil.util import trim_nulls

from . import as1
from .source import FRIENDS, Source, OMIT_LINK
from .source import FRIENDS, html_to_text, Source, OMIT_LINK

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -250,7 +250,7 @@ def from_as1(obj, out_type=None):
if type in as1.ACTOR_TYPES:
ret = {
'displayName': obj.get('displayName'),
'description': obj.get('summary'),
'description': html_to_text(obj.get('summary')),
}
if not out_type or out_type == 'app.bsky.actor.profile':
return trim_nulls({**ret, '$type': 'app.bsky.actor.profile'})
Expand Down Expand Up @@ -351,7 +351,7 @@ def from_as1(obj, out_type=None):
src = Bluesky('unused')
content = obj.get('content')
text = obj.get('summary') or content or obj.get('name') or ''
text = src.truncate(text, None, OMIT_LINK)
text = src.truncate(html_to_text(text), None, OMIT_LINK)

facets = []
if text == content:
Expand Down
32 changes: 28 additions & 4 deletions granary/tests/test_bluesky.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,16 +368,21 @@ def test_from_as1_post_with_author(self):
got = from_as1(POST_AUTHOR_AS, out_type='app.bsky.feed.defs#postView')
self.assert_equals(expected, got)

def test_from_as1_post_html_with_tag_indices_not_implemented(self):
def test_from_as1_post_html_skips_tag_indices(self):
post_as = copy.deepcopy(POST_AS)
post_as['object'].update({
'content': '<em>some html</em>',
'content_is_html': True,
'tags': [FACET_TAG],
# not set because content is HTML
# 'tags': [FACET_TAG],
})

with self.assertRaises(NotImplementedError):
from_as1(post_as)
# with self.assertRaises(NotImplementedError):
self.assert_equals({
'$type': 'app.bsky.feed.post',
'text': '_some html_',
'createdAt': '2007-07-07T03:04:05',
},from_as1(post_as))

def test_from_as1_post_without_tag_indices(self):
post_as = copy.deepcopy(POST_AS)
Expand All @@ -404,6 +409,16 @@ def test_from_as1_post_view_with_image(self):
got = from_as1(POST_AS_IMAGES, out_type='app.bsky.feed.defs#postView')
self.assert_equals(expected, got)

def test_from_as1_post_content_html(self):
self.assertEqual({
'$type': 'app.bsky.feed.post',
'text': 'Some\n_HTML_',
'createdAt': '',
}, from_as1({
'objectType': 'note',
'content': '<p>Some <br> <em>HTML</em></p>',
}))

def test_from_as1_object_vs_activity(self):
obj = {
'objectType': 'note',
Expand Down Expand Up @@ -454,6 +469,15 @@ def test_from_as1_actor_id_not_url(self):
'id': 'tag:foo.com,2001:bar',
}, out_type='app.bsky.actor.defs#profileView')['did'])

def test_from_as1_actor_description_html(self):
self.assertEqual({
'$type': 'app.bsky.actor.profile',
'description': 'Some\n_HTML_',
}, from_as1({
'objectType': 'person',
'summary': '<p>Some <br> <em>HTML</em></p>',
}))

def test_from_as1_composite_url(self):
self.assertEqual({
'$type': 'app.bsky.actor.defs#profileView',
Expand Down

0 comments on commit a1d1592

Please sign in to comment.