-
Notifications
You must be signed in to change notification settings - Fork 110
/
Copy pathnerd-dictation.py
88 lines (67 loc) · 2.26 KB
/
nerd-dictation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# User configuration file typically located at `~/.config/nerd-dictation/nerd-dictation.py`
import re
# -----------------------------------------------------------------------------
# Replace Multiple Words
TEXT_REPLACE_REGEX = (
("\\b" "data type" "\\b", "data-type"),
("\\b" "copy on write" "\\b", "copy-on-write"),
("\\b" "key word" "\\b", "keyword"),
)
TEXT_REPLACE_REGEX = tuple(
(re.compile(match), replacement)
for (match, replacement) in TEXT_REPLACE_REGEX
)
# -----------------------------------------------------------------------------
# Replace Single Words
# VOSK-API doesn't use capitals anywhere so they have to be explicit added in.
WORD_REPLACE = {
"i": "I",
"api": "API",
"linux": "Linux",
# It's also possible to ignore words entirely.
"um": "",
}
# Regular expressions allow partial words to be replaced.
WORD_REPLACE_REGEX = (
("^i'(.*)", "I'\\1"),
)
WORD_REPLACE_REGEX = tuple(
(re.compile(match), replacement)
for (match, replacement) in WORD_REPLACE_REGEX
)
# -----------------------------------------------------------------------------
# Add Punctuation
CLOSING_PUNCTUATION = {
"period": ".",
"comma": ",",
"question mark": "?",
"close quote": '"',
}
OPENING_PUNCTUATION = {
"open quote": '"',
}
# -----------------------------------------------------------------------------
# Main Processing Function
def nerd_dictation_process(text):
for match, replacement in TEXT_REPLACE_REGEX:
text = match.sub(replacement, text)
for match, replacement in CLOSING_PUNCTUATION.items():
text = text.replace(" " + match, replacement)
for match, replacement in OPENING_PUNCTUATION.items():
text = text.replace(match + " ", replacement)
words = text.split(" ")
for i, w in enumerate(words):
w_init = w
w_test = WORD_REPLACE.get(w)
if w_test is not None:
w = w_test
if w_init == w:
for match, replacement in WORD_REPLACE_REGEX:
w_test = match.sub(replacement, w)
if w_test != w:
w = w_test
break
words[i] = w
# Strip any words that were replaced with empty strings.
words[:] = [w for w in words if w]
return " ".join(words)