Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added 'and' between dollars and cents. Also added prosody rate support #51

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
7 changes: 7 additions & 0 deletions gruut/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@ class WordNode(Node):
text_with_ws: str = ""
interpret_as: typing.Union[str, InterpretAs] = ""
format: typing.Union[str, InterpretAsFormat] = ""
rate: str = ""

number: typing.Optional[Decimal] = None
date: typing.Optional[datetime] = None
Expand Down Expand Up @@ -403,6 +404,9 @@ class Word:
voice: str = ""
"""Voice (from SSML)"""

rate: str = ""
"""Prosody rate from SSML"""

pos: typing.Optional[str] = None
"""Part of speech (None if not set)"""

Expand Down Expand Up @@ -471,6 +475,9 @@ class Sentence:
voice: str = ""
"""Voice (from SSML)"""

rate: str = ""
"""Prosody rate (from SSML)"""

words: typing.List[Word] = field(default_factory=list)
"""Words in the sentence"""

Expand Down
39 changes: 37 additions & 2 deletions gruut/text_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ def get_lang(lang: str) -> str:
pos=word_node.pos if pos else None,
lang=get_lang(node.lang),
voice=node.voice,
rate=node.rate,
pause_before_ms=word_pause_before_ms,
marks_before=(
word_marks_before if word_marks_before else None
Expand Down Expand Up @@ -368,6 +369,27 @@ def get_lang(lang: str) -> str:
w.text for w in sentence.words if w.is_spoken
)

# Normalize rate
sent_rate = sentence.rate

# Get rate used across all words
for word in sentence.words:
if word.rate:
if sent_rate and (sent_rate != word.rate):
# Multiple rates
sent_rate = ""
break

sent_rate = word.rate

if sent_rate:
sentence.rate = sent_rate

# Set rate on all words
for word in sentence.words:
word.rate = sent_rate


# Normalize voice
sent_voice = sentence.voice

Expand Down Expand Up @@ -515,6 +537,9 @@ def iter_elements():
# [voice]
voice_stack: typing.List[str] = []

# [rate]
prosody_stack: typing.List[str] = []

# [(interpret_as, format)]
say_as_stack: typing.List[typing.Tuple[str, str]] = []

Expand Down Expand Up @@ -557,6 +582,9 @@ def scope_kwargs(target_class):
if say_as_stack:
scope["interpret_as"], scope["format"] = say_as_stack[-1]

if prosody_stack:
scope["rate"] = prosody_stack[-1]

if word_role is not None:
scope["role"] = word_role

Expand Down Expand Up @@ -707,6 +735,9 @@ def in_inline_lexicon(
elif end_tag == "say-as":
if say_as_stack:
say_as_stack.pop()
elif end_tag == "prosody":
if prosody_stack:
prosody_stack.pop()
elif end_tag == "lookup":
if lookup_stack:
lookup_stack.pop()
Expand Down Expand Up @@ -920,6 +951,9 @@ def in_inline_lexicon(
attrib_no_namespace(elem, "format", ""),
)
)
elif elem_tag == "prosody":
prosody_rate = attrib_no_namespace(elem, "rate", "1")
prosody_stack.append(prosody_rate)
elif elem_tag == "sub":
# Sub
last_alias = attrib_no_namespace(elem, "alias", "")
Expand Down Expand Up @@ -2391,8 +2425,9 @@ def _verbalize_currency(

# Post-process currency words
if num_has_frac:
# Discard num2words separator
num_str = num_str.replace("|", "")
# Replace num2words separator with and
num_str = num_str.replace("|", " and")
num_str = num_str.replace(" and ", " ", num_str.count(" and ") - 1) # Only the last "and" is retained
else:
# Remove 'zero cents' part
num_str = num_str.split("|", maxsplit=1)[0]
Expand Down