Skip to content

Commit

Permalink
bluesky truncation bug fix: use the appropriate lexicon, post vs profile
Browse files Browse the repository at this point in the history
  • Loading branch information
snarfed committed Jun 13, 2024
1 parent 25db712 commit 7d7e377
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 18 deletions.
24 changes: 15 additions & 9 deletions granary/bluesky.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,7 +679,7 @@ def from_as1(obj, out_type=None, blobs=None, client=None, original_fields_prefix
# maybe [Video] label, link that with a facet
text = Bluesky('unused').truncate(
full_text, original_post_text_suffix, include_link=include_link,
punctuation=('', ''))
punctuation=('', ''), type=type)
truncated = text != full_text
if truncated:
text_byte_end = len(text.split(ELLIPSIS, maxsplit=1)[0].encode())
Expand Down Expand Up @@ -1436,7 +1436,7 @@ class Bluesky(Source):
DOMAIN = 'bsky.app'
BASE_URL = 'https://bsky.app'
NAME = 'Bluesky'
TRUNCATE_TEXT_LENGTH = LEXRPC_BASE.defs['app.bsky.actor.profile']['record']['properties']['description']['maxGraphemes']
TRUNCATE_TEXT_LENGTH = None # different for post text vs profile description
POST_ID_RE = AT_URI_PATTERN
TYPE_LABELS = {
'post': 'post',
Expand Down Expand Up @@ -1827,10 +1827,6 @@ def _create(self, obj, preview=None, include_link=OMIT_LINK, ignore_formatting=F

# truncate and ellipsize content if necessary
url = obj.get('url')
content = self.truncate(content, url, include_link=include_link, type=type)

# TODO linkify mentions and hashtags
preview_content = util.linkify(content, pretty=True, skip_bare_cc_tlds=True)

post_label = f"{self.NAME} {self.TYPE_LABELS['post']}"

Expand Down Expand Up @@ -1878,10 +1874,13 @@ def _create(self, obj, preview=None, include_link=OMIT_LINK, ignore_formatting=F
'url': at_uri_to_web_url(repost_atp['subject']['uri']) + '/reposted-by'
})

elif (type in ('note', 'article') or is_reply or
elif (type in as1.POST_TYPES or is_reply or
(type == 'activity' and verb == 'post')): # probably a bookmark
# TODO: add bookmarked URL and facet
# tricky because we only want to do that if it's not truncated away
content = self.truncate(content, url, include_link=include_link, type='note')
# TODO linkify mentions and hashtags
preview_content = util.linkify(content, pretty=True, skip_bare_cc_tlds=True)
data = {'status': content}
if is_reply and base_url:
preview_description += f"<span class=\"verb\">{self.TYPE_LABELS['comment']}</span> to <a href=\"{base_url}\">this {self.TYPE_LABELS['post']}</a>:"
Expand Down Expand Up @@ -2004,11 +2003,18 @@ def upload_media(self, media):

return blobs

def truncate(self, *args, **kwargs):
def truncate(self, *args, type=None, **kwargs):
"""Thin wrapper around :meth:`Source.truncate` that sets default kwargs."""
if type in as1.ACTOR_TYPES:
length = LEXRPC_BASE.defs['app.bsky.actor.profile']['record']['properties']['description']['maxGraphemes']
elif type in POST_TYPES:
length = LEXRPC_BASE.defs['app.bsky.feed.post']['record']['properties']['text']['maxGraphemes']
else:
assert False, f'unexpected type {type}'

kwargs = {
'include_link': INCLUDE_LINK,
'target_length': self.TRUNCATE_TEXT_LENGTH,
'target_length': length,
'link_length': None,
'ellipsis': ' […]',
**kwargs,
Expand Down
1 change: 0 additions & 1 deletion granary/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -810,7 +810,6 @@ def _content_for_create(self, obj, ignore_formatting=False, prefer_name=False,
ignore_formatting (bool): whether to use content text as is, instead of
converting its HTML to plain text styling (newlines, etc.)
prefer_name (bool): whether to prefer ``displayName`` to ``content``
strip_first_video_tag (bool): if true, removes the first ``<video>`` tag.
useful when it will be uploaded and attached to the post natively in the
silo.
Expand Down
33 changes: 25 additions & 8 deletions granary/tests/test_bluesky.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
did_web_to_url,
from_as1,
from_as1_to_strong_ref,
LEXRPC_BASE,
MAX_IMAGES,
NO_AUTHENTICATED_LABEL,
to_as1,
Expand Down Expand Up @@ -778,7 +779,8 @@ def test_from_as1_post_without_tag_indices(self):
# no facet
self.assert_equals(POST_BSKY, self.from_as1(post_as))

@patch.object(Bluesky, 'TRUNCATE_TEXT_LENGTH', 15)
@patch.dict(LEXRPC_BASE.defs['app.bsky.feed.post']['record']['properties']['text'],
maxGraphemes=15)
def test_from_as1_post_truncate_adds_link_embed(self):
self.assert_equals({
'$type': 'app.bsky.feed.post',
Expand All @@ -801,7 +803,18 @@ def test_from_as1_post_truncate_adds_link_embed(self):
'content': 'more than ten chars long',
}))

@patch.object(Bluesky, 'TRUNCATE_TEXT_LENGTH', 45)
def test_from_as1_post_truncate_full_length(self):
# check that we use the app.bsky.feed.post limit, not app.bsky.actor.profile's
# https://github.com/snarfed/bridgy-fed/issues/1128
content = 'Das #BSW spricht den ganzen Tag von Frieden und Diplomatie. Beide Seiten anhören angeblich. Aber wenn das Opfer des illegalen Angriffskriegs was zu sagen hat, dann verlasssen sie aus Protest den Saal? Deutlicher kann man nicht machen, dass man möchte, dass der Angreifer gewinnt.'
self.assert_equals(content, self.from_as1({
'objectType': 'note',
'url': 'http://my.inst/post',
'content': content,
})['text'])

@patch.dict(LEXRPC_BASE.defs['app.bsky.feed.post']['record']['properties']['text'],
maxGraphemes=45)
def test_from_as1_post_with_images_truncated_puts_original_post_link_in_text(self):
content = 'hello hello hello hello hello hello hello hello hello'
self.assert_equals({
Expand All @@ -826,7 +839,8 @@ def test_from_as1_post_with_images_truncated_puts_original_post_link_in_text(sel
'url': 'http://my.inst/post',
}, blobs={NEW_BLOB_URL: NEW_BLOB}))

@patch.object(Bluesky, 'TRUNCATE_TEXT_LENGTH', 51)
@patch.dict(LEXRPC_BASE.defs['app.bsky.feed.post']['record']['properties']['text'],
maxGraphemes=51)
def test_from_as1_post_with_images_video_truncated_original_post_link_in_text(self):
self.assert_equals({
**POST_BSKY_IMAGES,
Expand All @@ -852,7 +866,8 @@ def test_from_as1_post_with_images_video_truncated_original_post_link_in_text(se
}],
}, blobs={NEW_BLOB_URL: NEW_BLOB}))

@patch.object(Bluesky, 'TRUNCATE_TEXT_LENGTH', 40)
@patch.dict(LEXRPC_BASE.defs['app.bsky.feed.post']['record']['properties']['text'],
maxGraphemes=40)
def test_from_as1_post_with_images_removes_facets_beyond_truncation(self):
content = 'hello <a href="http://foo">link</a> goodbye goodbye goodbye goodbye'
self.assert_equals({
Expand All @@ -877,7 +892,8 @@ def test_from_as1_post_with_images_removes_facets_beyond_truncation(self):
'url': 'http://my.inst/post',
}, blobs={NEW_BLOB_URL: NEW_BLOB}))

@patch.object(Bluesky, 'TRUNCATE_TEXT_LENGTH', 40)
@patch.dict(LEXRPC_BASE.defs['app.bsky.feed.post']['record']['properties']['text'],
maxGraphemes=40)
def test_from_as1_post_with_images_truncates_facet_that_overlaps_truncation(self):
content = '<a href="http://foo">hello link text</a> goodbye goodbye goodbye goodbye'
self.assert_equals({
Expand Down Expand Up @@ -1194,7 +1210,8 @@ def test_from_as1_html_link(self):
'content': content,
}))

@patch.object(Bluesky, 'TRUNCATE_TEXT_LENGTH', 12)
@patch.dict(LEXRPC_BASE.defs['app.bsky.feed.post']['record']['properties']['text'],
maxGraphemes=12)
def test_from_as1_html_omit_link_facet_after_truncation(self):
content = 'foo bar <a href="http://post">baaaaaaaz</a>'
self.assert_equals({
Expand Down Expand Up @@ -2382,9 +2399,9 @@ def test_post_id(self):
with self.subTest(input=input):
self.assertEqual(expected, self.bs.post_id(input))

@patch.dict(LEXRPC_BASE.defs['app.bsky.feed.post']['record']['properties']['text'],
maxGraphemes=20)
def test_preview_post(self):
self.bs.TRUNCATE_TEXT_LENGTH = 20

for content, expected in (
('foo ☕ bar', 'foo ☕ bar'),
('too long, will be ellipsized', 'too long, will […]'),
Expand Down

0 comments on commit 7d7e377

Please sign in to comment.