Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement format_speak_tags from neon_utils #36

Merged
merged 3 commits into from
Mar 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions ovos_plugin_manager/templates/tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,45 @@ def remove_ssml(text):
"""
return re.sub('<[^>]*>', '', text).replace(' ', ' ')

@staticmethod
def format_speak_tags(sentence: str, include_tags: bool = True) -> str:
"""
Cleans up SSML tags for speech synthesis and ensures the phrase is
wrapped in 'speak' tags and any excluded text is
removed.
Args:
sentence: Input sentence to be spoken
include_tags: Flag to include <speak> tags in returned string
Returns:
Cleaned sentence to pass to TTS
"""
# Wrap sentence in speak tag if no tags present
if "<speak>" not in sentence and "</speak>" not in sentence:
to_speak = f"<speak>{sentence}</speak>"
# Assume speak starts at the beginning of the sentence
elif "<speak>" not in sentence:
to_speak = f"<speak>{sentence}"
# Assume speak ends at the end of the sentence
elif "</speak>" not in sentence:
to_speak = f"{sentence}</speak>"
else:
to_speak = sentence

# Trim text outside of speak tags
if not to_speak.startswith("<speak>"):
to_speak = f"<speak>{to_speak.split('<speak>', 1)[1]}"

if not to_speak.endswith("</speak>"):
to_speak = f"{to_speak.split('</speak>', 1)[0]}</speak>"

if to_speak == "<speak></speak>":
return ""

if include_tags:
return to_speak
else:
return to_speak.lstrip("<speak>").rstrip("</speak>")

def validate_ssml(self, utterance):
"""Check if engine supports ssml, if not remove all tags.

Expand All @@ -503,6 +542,14 @@ def validate_ssml(self, utterance):
Returns:
str: validated_sentence
"""

# Validate speak tags
if not self.ssml_tags or "speak" not in self.ssml_tags:
self.format_speak_tags(utterance, False)
elif self.ssml_tags and "speak" in self.ssml_tags:
self.format_speak_tags(utterance)


# if ssml is not supported by TTS engine remove all tags
if not self.ssml_tags:
return self.remove_ssml(utterance)
Expand Down
12 changes: 0 additions & 12 deletions test/unittests/test_something.py

This file was deleted.

120 changes: 120 additions & 0 deletions test/unittests/test_ssml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# write your first unittest!
import unittest
from ovos_plugin_manager.templates.tts import TTS
from ovos_utils.messagebus import FakeBus


class TestSSML(unittest.TestCase):
@classmethod
def setUpClass(self):
tts = TTS() # dummy engine
# bus = FakeBus()
# tts.init(bus)
self.tts = tts

def test_ssml(self):
sentence = "<speak>Prosody can be used to change the way words " \
"sound. The following words are " \
"<prosody volume='x-loud'> " \
"quite a bit louder than the rest of this passage. " \
"</prosody> Each morning when I wake up, " \
"<prosody rate='x-slow'>I speak quite slowly and " \
"deliberately until I have my coffee.</prosody> I can " \
"also change the pitch of my voice using prosody. " \
"Do you like <prosody pitch='+5%'> speech with a pitch " \
"that is higher, </prosody> or <prosody pitch='-10%'> " \
"is a lower pitch preferable?</prosody></speak>"
sentence_no_ssml = "Prosody can be used to change the way " \
"words sound. The following words are quite " \
"a bit louder than the rest of this passage. " \
"Each morning when I wake up, I speak quite " \
"slowly and deliberately until I have my " \
"coffee. I can also change the pitch of my " \
"voice using prosody. Do you like speech " \
"with a pitch that is higher, or is " \
"a lower pitch preferable?"
sentence_bad_ssml = "<foo_invalid>" + sentence + \
"</foo_invalid end=whatever>"
sentence_extra_ssml = "<whispered>whisper tts<\\whispered>"

tts = TTS() # dummy engine
# test valid ssml
tts.ssml_tags = ['speak', 'prosody']
self.assertEqual(tts.validate_ssml(sentence), sentence)

# test extra ssml
tts.ssml_tags = ['whispered']
self.assertEqual(tts.validate_ssml(sentence_extra_ssml),
sentence_extra_ssml)

# test unsupported extra ssml
tts.ssml_tags = ['speak', 'prosody']
self.assertEqual(tts.validate_ssml(sentence_extra_ssml),
"whisper tts")

# test mixed valid / invalid ssml
tts.ssml_tags = ['speak', 'prosody']
self.assertEqual(tts.validate_ssml(sentence_bad_ssml), sentence)

# test unsupported ssml
tts.ssml_tags = []
self.assertEqual(tts.validate_ssml(sentence), sentence_no_ssml)

self.assertEqual(tts.validate_ssml(sentence_bad_ssml),
sentence_no_ssml)

self.assertEqual(TTS.remove_ssml(sentence), sentence_no_ssml)

def test_format_speak_tags_with_speech(self):
valid_output = "<speak>Speak This.</speak>"
no_tags = TTS.format_speak_tags("Speak This.")
self.assertEqual(no_tags, valid_output)

leading_only = TTS.format_speak_tags("<speak>Speak This.")
self.assertEqual(leading_only, valid_output)

leading_with_exclusion = TTS.format_speak_tags("Nope.<speak>Speak This.")
self.assertEqual(leading_with_exclusion, valid_output)

trailing_only = TTS.format_speak_tags("Speak This.</speak>")
self.assertEqual(trailing_only, valid_output)

trailing_with_exclusion = TTS.format_speak_tags("Speak This.</speak> But not this.")
self.assertEqual(trailing_with_exclusion, valid_output)

tagged = TTS.format_speak_tags("<speak>Speak This.</speak>")
self.assertEqual(tagged, valid_output)

tagged_with_exclusion = TTS.format_speak_tags("Don't<speak>Speak This.</speak>But Not this.")
self.assertEqual(tagged_with_exclusion, valid_output)

def test_format_speak_tags_empty(self):
leading_closure = TTS.format_speak_tags("</speak>hello.")
self.assertFalse(leading_closure)

trailing_open = TTS.format_speak_tags("hello.<speak>")
self.assertFalse(trailing_open)

def test_format_speak_tags_with_speech_no_tags(self):
valid_output = "Speak This."
no_tags = TTS.format_speak_tags("Speak This.", False)
self.assertEqual(no_tags, valid_output)

leading_only = TTS.format_speak_tags("<speak>Speak This.", False)
self.assertEqual(leading_only, valid_output)

leading_with_exclusion = TTS.format_speak_tags("Nope.<speak>Speak This.", False)
self.assertEqual(leading_with_exclusion, valid_output)

trailing_only = TTS.format_speak_tags("Speak This.</speak>", False)
self.assertEqual(trailing_only, valid_output)

trailing_with_exclusion = TTS.format_speak_tags("Speak This.</speak> But not this.", False)
self.assertEqual(trailing_with_exclusion, valid_output)

tagged = TTS.format_speak_tags("<speak>Speak This.</speak>", False)
self.assertEqual(tagged, valid_output)

tagged_with_exclusion = TTS.format_speak_tags("Don't<speak>Speak This.</speak>But Not this.", False)
self.assertEqual(tagged_with_exclusion, valid_output)