Skip to content

Commit

Permalink
Improve capitalization and other prose formatters (talonhub#1216)
Browse files Browse the repository at this point in the history
- Use Python's `.title()` to better handle hyphenated words.
- Make `CAPITALIZE_ALL_WORDS` (should really be named something like
`TITLE_CASE_ALL_WORDS`) a prose formatter and extend it to handle
punctuation.
- Don't change case of words that already include capital letters.
- Add tests.
  • Loading branch information
nriley authored Jul 22, 2023
1 parent 7525147 commit bb91bef
Show file tree
Hide file tree
Showing 4 changed files with 172 additions and 52 deletions.
8 changes: 2 additions & 6 deletions core/help/help.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ def gui_formatters(gui: imgui.GUI):
for key, val in formatters_words.items():
gui.text(f"{val}: {key}")

gui.spacer()
gui.text("* prose formatter")
gui.spacer()
if gui.button("Help close"):
gui_formatters.hide()
Expand Down Expand Up @@ -544,12 +546,6 @@ def help_formatters(ab: dict):
global formatters_words
formatters_words = ab
reset()
# print("help_alphabet - alphabet gui_alphabet: {}".format(gui_alphabet.showing))
# print(
# "help_alphabet - gui_context_help showing: {}".format(
# gui_context_help.showing
# )
# )
hide_all_help_guis()
gui_formatters.show()
register_events(False)
Expand Down
150 changes: 106 additions & 44 deletions core/text/formatters.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@
import re
from typing import Union

from talon import Context, Module, actions
from talon import Context, Module, actions, app
from talon.grammar import Phrase

ctx = Context()
key = actions.key
edit = actions.edit

words_to_keep_lowercase = (
"a an the at by for in is of on to up and as but or nor".split()
"a an and as at but by en for if in nor of on or per the to v via vs".split()
)

# The last phrase spoken, without & with formatting. Used for reformatting.
Expand Down Expand Up @@ -89,11 +89,47 @@ def first_vs_rest(first_func, rest_func=lambda w: w):
return lambda i, word, _: first_func(word) if i == 0 else rest_func(word)


def title_case():
last_word = None

def title_case_word(i, word, is_end):
nonlocal last_word

if word.islower() and ( # contains only lowercase letters
word not in words_to_keep_lowercase
or i == 0
or is_end
or not last_word[
-1
].isalnum() # title case subsequent words if they follow punctuation
):
if "-" in word:
components = word.split("-")
title_case_component = title_case()
components = [
title_case_component(j, component, j == len(components) - 1)
for j, component in enumerate(components)
]
word = "-".join(components)
elif word_start := re.match(r"\W*", word).end():
# word begins with non-alphanumeric characters
word = word[:word_start] + word[word_start:].capitalize()
else:
word = word.capitalize()

last_word = word

return word

return title_case_word


def every_word(word_func):
"""Apply one function to every word."""
return lambda i, word, _: word_func(word)


# All formatters (code and prose)
formatters_dict = {
"NOOP": (SEP, lambda i, word, _: word),
"DOUBLE_UNDERSCORE": (NOSEP, first_vs_rest(lambda w: f"__{w}__")),
Expand Down Expand Up @@ -125,43 +161,53 @@ def every_word(word_func):
"DOT_SEPARATED": words_with_joiner("."),
"DOT_SNAKE": (NOSEP, lambda i, word, _: "." + word if i == 0 else "_" + word),
"SLASH_SEPARATED": (NOSEP, every_word(lambda w: "/" + w)),
"CAPITALIZE_FIRST_WORD": (SEP, first_vs_rest(lambda w: w[:1].upper() + w[1:])),
"CAPITALIZE_ALL_WORDS": (
"CAPITALIZE_FIRST_WORD": (
SEP,
lambda i, word, _: word[:1].upper() + word[1:]
if i == 0 or word not in words_to_keep_lowercase
else word,
first_vs_rest(lambda w: title_case()(0, w, True)),
),
"CAPITALIZE_ALL_WORDS": (SEP, title_case()),
}

# This is the mapping from spoken phrases to formatters
formatters_words = {
"all cap": formatters_dict["ALL_CAPS"],
"all down": formatters_dict["ALL_LOWERCASE"],
"camel": formatters_dict["PRIVATE_CAMEL_CASE"],
"dotted": formatters_dict["DOT_SEPARATED"],
"dub string": formatters_dict["DOUBLE_QUOTED_STRING"],
"dunder": formatters_dict["DOUBLE_UNDERSCORE"],
"hammer": formatters_dict["PUBLIC_CAMEL_CASE"],
"kebab": formatters_dict["DASH_SEPARATED"],
"packed": formatters_dict["DOUBLE_COLON_SEPARATED"],
"padded": formatters_dict["SPACE_SURROUNDED_STRING"],
"slasher": formatters_dict["SLASH_SEPARATED"],
"smash": formatters_dict["NO_SPACES"],
"snake": formatters_dict["SNAKE_CASE"],
"string": formatters_dict["SINGLE_QUOTED_STRING"],
"title": formatters_dict["CAPITALIZE_ALL_WORDS"],
# Mapping from spoken phrases to formatter names
code_formatter_names = {
"all cap": "ALL_CAPS",
"all down": "ALL_LOWERCASE",
"camel": "PRIVATE_CAMEL_CASE",
"dotted": "DOT_SEPARATED",
"dub string": "DOUBLE_QUOTED_STRING",
"dunder": "DOUBLE_UNDERSCORE",
"hammer": "PUBLIC_CAMEL_CASE",
"kebab": "DASH_SEPARATED",
"packed": "DOUBLE_COLON_SEPARATED",
"padded": "SPACE_SURROUNDED_STRING",
"slasher": "SLASH_SEPARATED",
"smash": "NO_SPACES",
"snake": "SNAKE_CASE",
"string": "SINGLE_QUOTED_STRING",
}
prose_formatter_names = {
"say": "NOOP",
"speak": "NOOP",
"sentence": "CAPITALIZE_FIRST_WORD",
"title": "CAPITALIZE_ALL_WORDS",
}
# Mapping from spoken phrases to formatters
formatter_words = {
phrase: formatters_dict[name]
for phrase, name in (code_formatter_names | prose_formatter_names).items()
}

all_formatters = {}
all_formatters.update(formatters_dict)
all_formatters.update(formatters_words)
# Allow referencing formatters by either their names or spoken forms
all_prose_formatters = [
item for sublist in prose_formatter_names.items() for item in sublist
]
all_formatters = formatters_dict | formatter_words

mod = Module()
mod.list("formatters", desc="list of formatters")
mod.list("formatters", desc="list of all formatters (code and prose)")
mod.list("code_formatter", desc="list of formatters typically applied to code")
mod.list(
"prose_formatter",
desc="words to start dictating prose, and the formatter they apply",
"prose_formatter", desc="list of prose formatters (words to start dictating prose)"
)


Expand All @@ -171,14 +217,20 @@ def formatters(m) -> str:
return ",".join(m.formatters_list)


@mod.capture(rule="{self.code_formatter}+")
def code_formatters(m) -> str:
"Returns a comma-separated string of code formatters e.g. 'SNAKE,DUBSTRING'"
return ",".join(m.code_formatter_list)


@mod.capture(
# Note that if the user speaks something like "snake dot", it will
# insert "dot" - otherwise, they wouldn't be able to insert punctuation
# words directly.
rule="<self.formatters> <user.text> (<user.text> | <user.formatter_immune>)*"
)
def format_text(m) -> str:
"Formats the text and returns a string"
"""Formats text and returns a string"""
out = ""
formatters = m[0]
for chunk in m[1:]:
Expand All @@ -189,6 +241,14 @@ def format_text(m) -> str:
return out


@mod.capture(
rule="<self.code_formatters> <user.text> (<user.text> | <user.formatter_immune>)*"
)
def format_code(m) -> str:
"""Formats code and returns a string"""
return format_text(m)


class ImmuneString:
"""Wrapper that makes a string immune from formatting."""

Expand Down Expand Up @@ -248,29 +308,34 @@ def formatters_reformat_selection(formatters: str) -> str:
"""Reformats the current selection."""
selected = edit.selected_text()
if not selected:
print("Asked to reformat selection, but nothing selected!")
app.notify("Asked to reformat selection, but nothing selected!")
return
unformatted = unformat_text(selected)
if formatters not in all_prose_formatters:
selected = unformat_text(selected)
# Delete separately for compatibility with programs that don't overwrite
# selected text (e.g. Emacs)
edit.delete()
text = actions.self.formatted_text(unformatted, formatters)
text = actions.self.formatted_text(selected, formatters)
actions.insert(text)
return text

def get_formatters_words() -> dict:
"""returns a list of words currently used as formatters, and a demonstration string using those formatters"""
formatters_help_demo = {}
for name in sorted(set(formatters_words.keys())):
formatters_help_demo[name] = format_phrase_without_adding_to_history(
for name in sorted(set(formatter_words)):
demo = format_phrase_without_adding_to_history(
["one", "two", "three"], name
)
if name in prose_formatter_names:
name += " *"
formatters_help_demo[name] = demo
return formatters_help_demo

def reformat_text(text: str, formatters: str) -> str:
"""Reformat the text."""
unformatted = unformat_text(text)
return actions.user.formatted_text(unformatted, formatters)
if formatters not in all_prose_formatters:
text = unformat_text(text)
return actions.user.formatted_text(text, formatters)

def insert_many(strings: list[str]) -> None:
"""Insert a list of strings, sequentially."""
Expand All @@ -292,9 +357,6 @@ def unformat_text(text: str) -> str:
return unformatted.lower()


ctx.lists["self.formatters"] = formatters_words.keys()
ctx.lists["self.prose_formatter"] = {
"say": "NOOP",
"speak": "NOOP",
"sentence": "CAPITALIZE_FIRST_WORD",
}
ctx.lists["self.formatters"] = formatter_words.keys()
ctx.lists["self.code_formatter"] = code_formatter_names.keys()
ctx.lists["self.prose_formatter"] = prose_formatter_names.keys()
4 changes: 2 additions & 2 deletions core/text/text.talon
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ phrase <user.text> over:
insert(text)
{user.prose_formatter} <user.prose>$: user.insert_formatted(prose, prose_formatter)
{user.prose_formatter} <user.prose> over: user.insert_formatted(prose, prose_formatter)
<user.format_text>+$: user.insert_many(format_text_list)
<user.format_text>+ over: user.insert_many(format_text_list)
<user.format_code>+$: user.insert_many(format_code_list)
<user.format_code>+ over: user.insert_many(format_code_list)
<user.formatters> that: user.formatters_reformat_selection(user.formatters)
word <user.word>:
user.add_phrase_to_history(word)
Expand Down
62 changes: 62 additions & 0 deletions test/test_formatters.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,65 @@ def test_no_spaces():
result = formatters.Actions.formatted_text("hello world", "NO_SPACES")

assert result == "helloworld"

def test_capitalize_first_word():
result = formatters.Actions.formatted_text(
"hello world", "CAPITALIZE_FIRST_WORD"
)

assert result == "Hello world"

result = formatters.Actions.formatted_text(
"hEllo wOrld", "CAPITALIZE_FIRST_WORD"
)

assert result == "hEllo wOrld"

def test_capitalize_all_words():
result = formatters.Actions.formatted_text(
"hello world", "CAPITALIZE_ALL_WORDS"
)

assert result == "Hello World"

result = formatters.Actions.formatted_text(
"hEllo wOrld", "CAPITALIZE_ALL_WORDS"
)

assert result == "hEllo wOrld"

result = formatters.Actions.formatted_text(
"Hello to the world", "CAPITALIZE_ALL_WORDS"
)

assert result == "Hello to the World"

result = formatters.Actions.formatted_text(
"hello: the world", "CAPITALIZE_ALL_WORDS"
)

assert result == "Hello: The World"

result = formatters.Actions.formatted_text(
"down and up", "CAPITALIZE_ALL_WORDS"
)

assert result == "Down and Up"

result = formatters.Actions.formatted_text(
"down-and-up", "CAPITALIZE_ALL_WORDS"
)

assert result == "Down-and-Up"

result = formatters.Actions.formatted_text(
"it's good they’re Bill’s friends", "CAPITALIZE_ALL_WORDS"
)

assert result == "It's Good They’re Bill’s Friends"

result = formatters.Actions.formatted_text(
'"how\'s it going?"', "CAPITALIZE_ALL_WORDS"
)

assert result == '"How\'s It Going?"'

0 comments on commit bb91bef

Please sign in to comment.