Skip to content

Commit

Permalink
update for twitter's new weighted char counting and 280 char limit
Browse files Browse the repository at this point in the history
  • Loading branch information
snarfed committed Nov 18, 2017
1 parent ae4e8ba commit 7cb54f6
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 6 deletions.
40 changes: 35 additions & 5 deletions brevity.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,22 @@
FORMAT_NOTE = 'note'
FORMAT_ARTICLE = 'article'

# From https://developer.twitter.com/en/docs/developer-utilities/twitter-text
# on 2017-11-18.
WEIGHTS = {
'version': 2,
'maxWeightedTweetLength': 280,
'scale': 100,
'defaultWeight': 200,
'transformedURLLength': 23,
'ranges': [
{'start': 0, 'end': 4351, 'weight': 100},
{'start': 8192, 'end': 8205, 'weight': 100},
{'start': 8208, 'end': 8223, 'weight': 100},
{'start': 8242, 'end': 8247, 'weight': 100},
],
}


class Token:
def __init__(self, tag, content, required=False):
Expand Down Expand Up @@ -124,13 +140,15 @@ def add_scheme(url):


def shorten(text, permalink=None, permashortlink=None, permashortcitation=None,
target_length=140, link_length=23, format=FORMAT_NOTE):
target_length=WEIGHTS['maxWeightedTweetLength'],
link_length=WEIGHTS['transformedURLLength'],
format=FORMAT_NOTE):
"""Prepare note text for publishing as a tweet. Ellipsize and add a
permalink or citation.
If the full text plus optional '(permashortlink)' or
'(permashortcitation)' can fit into the target length (defaults to
140 characters), it will return the composed text.
280 characters), it will return the composed text.
If format is FORMAT_ARTICLE, text is taken to be the title of a longer
article. It will be formatted as '[text]: [permalink]'. The values of
Expand All @@ -153,7 +171,7 @@ def shorten(text, permalink=None, permashortlink=None, permashortcitation=None,
:param string permashortcitation: Citation to the original note, e.g.
'ttk.me t4_f2', an alternative to permashortlink. If provided will be
added in parentheses to the end of all notes. (optional)
:param int target_length: The target overall length (default = 140)
:param int target_length: The target overall length (default = 280)
:param int link_length: The t.co length for a URL (default = 23)
:param string format: one of the FORMAT_ constants that determines
whether to format the text like a note or an article (default = FORMAT_NOTE)
Expand All @@ -164,18 +182,30 @@ def truncate_to_nearest_word(text, length):
delimiters = ',.;: \t\r\n'
# try stripping trailing whitespace first
text = text.rstrip()
if len(text) <= length:
if str_length(text) <= length:
return text
# walk backwards until we find a delimiter
for j in xrange(length, -1, -1):
if text[j] in delimiters:
return text[:j].rstrip(delimiters)
return text[:length]

def char_length(char):
point = ord(char)
for range in WEIGHTS['ranges']:
if point >= range['start'] and point <= range['end']:
weight = range['weight']
else:
weight = WEIGHTS['defaultWeight']
return weight / WEIGHTS['scale']

def str_length(val):
return sum(char_length(char) for char in val)

def token_length(token):
if token.tag == 'link':
return link_length
return len(token.content)
return str_length(token.content)

def total_length(tokens):
return sum(token_length(t) for t in tokens)
Expand Down
2 changes: 1 addition & 1 deletion testcases
Submodule testcases updated 1 files
+35 −16 tests.json

0 comments on commit 7cb54f6

Please sign in to comment.