Skip to content

Commit

Permalink
[TTS] expand to support flexible dictionary entry formats in IPAG2P. (N…
Browse files Browse the repository at this point in the history
…VIDIA#5318)

* expand to support flexible dictionary entry formats in IPAG2P.
* removed unused imports in test.collections.tts
* removed unused imports in nemo.collections.tts.modules
* removed unused imports in nemo_text_processing.text_normalization.zh
* updated unit tests with new cases
* renamed test function names because we only test IPAG2P rather than all classes in the modules.py.
* revise current test dict with a single space between word and pronunications.

Signed-off-by: Xuesong Yang <[email protected]>
Signed-off-by: andrusenkoau <[email protected]>
  • Loading branch information
XuesongYang authored and andrusenkoau committed Jan 5, 2023
1 parent 1be8732 commit 580ce4a
Show file tree
Hide file tree
Showing 9 changed files with 50 additions and 38 deletions.
3 changes: 1 addition & 2 deletions nemo/collections/tts/modules/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@

###############################################################################

import ast
from typing import List, Optional, Tuple
from typing import Optional, Tuple

import numpy as np
import torch
Expand Down
2 changes: 0 additions & 2 deletions nemo/collections/tts/modules/radtts.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import pdb

###############################################################################
import torch
Expand All @@ -32,7 +31,6 @@
LinearNorm,
get_mask_from_lengths,
getRadTTSEncoder,
sort_tensor,
)
from nemo.collections.tts.modules.submodules import PartialConv1d
from nemo.core.classes import Exportable, NeuralModule
Expand Down
2 changes: 1 addition & 1 deletion nemo/collections/tts/modules/submodules.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import List, Optional, Tuple
from typing import Optional, Tuple

import torch
from torch.autograd import Variable
Expand Down
10 changes: 5 additions & 5 deletions nemo_text_processing/g2p/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,14 +321,14 @@ def __init__(
# load the dictionary file where there may exist a digit suffix after a word, which
# represents the pronunciation variant of that word.
phoneme_dict_obj = defaultdict(list)
_alt_re = re.compile(r'\([0-9]+\)')
_alt_re = re.compile(r"\([0-9]+\)")
with open(phoneme_dict, "r") as fdict:
for line in fdict:
if len(line) and ('A' <= line[0] <= 'Z' or line[0] == "'"):
parts = line.strip().split(" ")
assert len(parts) == 2, f"Wrong format for the entry: {line.strip()}."
word = re.sub(_alt_re, '', parts[0])
phoneme_dict_obj[word].append(list(parts[1]))
parts = line.strip().split(maxsplit=1)
word = re.sub(_alt_re, "", parts[0])
prons = re.sub(r"\s+", "", parts[1])
phoneme_dict_obj[word].append(list(prons))
else:
# Load phoneme_dict as dictionary object
logging.info("Loading phoneme_dict as a Dict object.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import os

import pynini
from nemo_text_processing.text_normalization.zh.graph_utils import NEMO_SIGMA, GraphFst
from nemo_text_processing.text_normalization.zh.graph_utils import GraphFst
from nemo_text_processing.text_normalization.zh.taggers.cardinal import Cardinal
from nemo_text_processing.text_normalization.zh.taggers.char import Char
from nemo_text_processing.text_normalization.zh.taggers.date import Date
Expand Down
1 change: 0 additions & 1 deletion tests/collections/tts/test_torch_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import os
from pathlib import Path

import numpy as np
import pytest
import torch
from nemo_text_processing.g2p.modules import EnglishG2p
Expand Down
3 changes: 0 additions & 3 deletions tests/collections/tts/test_waveglow.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,12 @@

import os
import tempfile
from unittest import TestCase

import onnx
import pytest
import torch
from omegaconf import DictConfig

from nemo.collections.tts.models import WaveGlowModel
from nemo.collections.tts.modules import WaveGlowModule
from nemo.core.classes import typecheck

mcfg = DictConfig(
Expand Down
5 changes: 4 additions & 1 deletion tests/nemo_text_processing/g2p/phoneme_dict/test_dict.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
HELLO həˈɫoʊ
WORLD ˈwɝɫd
WORLD ˈwɝɫd
LEAD ˈlɛd
LEAD(1) ˈ l i d
NVIDIA ɛ n ˈ v ɪ d i ə
60 changes: 38 additions & 22 deletions tests/nemo_text_processing/g2p/test_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from nemo_text_processing.g2p.modules import IPAG2P


class TestModules:
class TestIPAG2P:

PHONEME_DICT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "phoneme_dict")
PHONEME_DICT_PATH_DE = os.path.join(PHONEME_DICT_DIR, "test_dict_de.txt")
Expand All @@ -45,43 +45,59 @@ def _create_g2p(

@pytest.mark.run_only_on('CPU')
@pytest.mark.unit
def test_ipa_g2p_parse_dict(self):
def test_normalize_dict_with_phonemes(self):
# fmt: off
expected_symbols = {
'h', 'ə', 'ˈ', 'ɫ', 'o', 'ʊ', 'ˈ',
'w', 'ɝ', 'ɫ', 'd'
'h', 'ə', 'ˈ', 'ɫ', 'o', 'ʊ',
'ˈ', 'w', 'ɝ', 'ɫ', 'd',
'ˈ', 'l', 'ɛ', 'd',
'ˈ', 'l', 'i', 'd',
'ɛ', 'n', 'ˈ', 'v', 'ɪ', 'd', 'i', 'ə'
}
# fmt: on
g2p = self._create_g2p()

assert expected_symbols == g2p.symbols
assert len(g2p.phoneme_dict["HELLO"]) == 1
assert len(g2p.phoneme_dict["WORLD"]) == 1
assert g2p.phoneme_dict["HELLO"][0] == [char for char in "həˈɫoʊ"]
assert g2p.phoneme_dict["WORLD"][0] == [char for char in "ˈwɝɫd"]
assert len(g2p.phoneme_dict["LEAD"]) == 2
assert len(g2p.phoneme_dict["NVIDIA"]) == 1
assert g2p.phoneme_dict["HELLO"][0] == list("həˈɫoʊ")
assert g2p.phoneme_dict["WORLD"][0] == list("ˈwɝɫd")
assert g2p.phoneme_dict["LEAD"] == [list("ˈlɛd"), list("ˈlid")]
assert g2p.phoneme_dict["NVIDIA"][0] == list("ɛnˈvɪdiə")

@pytest.mark.run_only_on('CPU')
@pytest.mark.unit
def test_ipa_g2p_parse_dict_with_chars(self):
def test_normalize_dict_with_graphemes_and_phonemes(self):
# fmt: off
expected_symbols = {
'H', 'E', 'L', 'L', 'O',
'W', 'O', 'R', 'L', 'D',
'L', 'E', 'A', 'D',
'N', 'V', 'I', 'D', 'I', 'A',
'h', 'ə', 'ˈ', 'ɫ', 'o', 'ʊ',
'ˈ', 'w', 'ɝ', 'ɫ', 'd'
'ˈ', 'w', 'ɝ', 'ɫ', 'd',
'ˈ', 'l', 'ɛ', 'd',
'ˈ', 'l', 'i', 'd',
'ɛ', 'n', 'ˈ', 'v', 'ɪ', 'd', 'i', 'ə'
}
# fmt: on
g2p = self._create_g2p(use_chars=True)

assert expected_symbols == g2p.symbols
assert len(g2p.phoneme_dict["HELLO"]) == 1
assert len(g2p.phoneme_dict["WORLD"]) == 1
assert g2p.phoneme_dict["HELLO"][0] == [char for char in "həˈɫoʊ"]
assert g2p.phoneme_dict["WORLD"][0] == [char for char in "ˈwɝɫd"]
assert len(g2p.phoneme_dict["LEAD"]) == 2
assert len(g2p.phoneme_dict["NVIDIA"]) == 1
assert g2p.phoneme_dict["HELLO"][0] == list("həˈɫoʊ")
assert g2p.phoneme_dict["WORLD"][0] == list("ˈwɝɫd")
assert g2p.phoneme_dict["LEAD"] == [list("ˈlɛd"), list("ˈlid")]
assert g2p.phoneme_dict["NVIDIA"][0] == list("ɛnˈvɪdiə")

@pytest.mark.run_only_on('CPU')
@pytest.mark.unit
def test_ipa_g2p(self):
def test_forward_call(self):
input_text = "Hello world."
expected_output = [char for char in "həˈɫoʊ ˈwɝɫd."]
g2p = self._create_g2p()
Expand All @@ -91,11 +107,11 @@ def test_ipa_g2p(self):

@pytest.mark.run_only_on('CPU')
@pytest.mark.unit
def test_ipa_g2p_with_dict_input(self):
def test_forward_call_with_file_or_object_dict_type(self):
input_text = "Hello world."
expected_output = [char for char in "həˈɫoʊ ˈwɝɫd."]

phoneme_dict = {"HELLO": ["həˈɫoʊ"], "WORLD": ["ˈwɝɫd"]}
phoneme_dict = {"HELLO": ["həˈɫoʊ"], "WORLD": ["ˈwɝɫd"], "LEAD": ["ˈlɛd", "ˈlid"], "NVIDIA": ["ɛnˈvɪdiə"]}

g2p_file = self._create_g2p()
g2p_dict = self._create_g2p(phoneme_dict=phoneme_dict)
Expand All @@ -107,7 +123,7 @@ def test_ipa_g2p_with_dict_input(self):

@pytest.mark.run_only_on('CPU')
@pytest.mark.unit
def test_ipa_g2p_with_oov(self):
def test_forward_call_with_oov_word(self):
input_text = "Hello Kitty!"
expected_output = [char for char in "həˈɫoʊ KITTY!"]
g2p = self._create_g2p()
Expand All @@ -117,7 +133,7 @@ def test_ipa_g2p_with_oov(self):

@pytest.mark.run_only_on('CPU')
@pytest.mark.unit
def test_ipa_g2p_with_oov_func(self):
def test_forward_call_with_oov_func(self):
input_text = "Hello Kitty!"
expected_output = [char for char in "həˈɫoʊ test!"]
g2p = self._create_g2p(apply_to_oov_word=lambda x: "test")
Expand All @@ -127,7 +143,7 @@ def test_ipa_g2p_with_oov_func(self):

@pytest.mark.run_only_on('CPU')
@pytest.mark.unit
def test_ipa_g2p_graphemes(self):
def test_forward_call_with_graphemes_uppercase(self):
input_text = "Hello world."
expected_output = [char for char in input_text.upper()]
g2p = self._create_g2p(use_chars=True, phoneme_probability=0.0)
Expand All @@ -137,7 +153,7 @@ def test_ipa_g2p_graphemes(self):

@pytest.mark.run_only_on('CPU')
@pytest.mark.unit
def test_ipa_g2p_graphemes_lower(self):
def test_forward_call_with_graphemes_lowercase(self):
input_text = "Hello world."
expected_output = [char for char in input_text.lower()]
g2p = self._create_g2p(use_chars=True, phoneme_probability=0.0, set_graphemes_upper=False)
Expand All @@ -147,7 +163,7 @@ def test_ipa_g2p_graphemes_lower(self):

@pytest.mark.run_only_on('CPU')
@pytest.mark.unit
def test_ipa_g2p_with_escaped_characters(self):
def test_forward_call_with_escaped_characters(self):
input_text = "Hello |wo rld|."
expected_output = ["h", "ə", "ˈ", "ɫ", "o", "ʊ", " ", "wo", "rld", "."]
g2p = self._create_g2p()
Expand All @@ -157,13 +173,13 @@ def test_ipa_g2p_with_escaped_characters(self):

@pytest.mark.run_only_on('CPU')
@pytest.mark.unit
def test_ipa_g2p_unsupported_locale(self):
def test_instantiate_unsupported_locale(self):
with pytest.raises(ValueError, match="Unsupported locale"):
self._create_g2p(locale="en-USA")

@pytest.mark.run_only_on('CPU')
@pytest.mark.unit
def test_ipa_g2p_de_de(self):
def test_forward_call_de_de(self):
input_text = "Hallo „welt“!"
expected_output = [char for char in "hˈaloː „vˈɛlt“!"]
g2p = self._create_g2p(phoneme_dict=self.PHONEME_DICT_PATH_DE, locale="de-DE")
Expand All @@ -173,7 +189,7 @@ def test_ipa_g2p_de_de(self):

@pytest.mark.run_only_on('CPU')
@pytest.mark.unit
def test_ipa_g2p_en_us(self):
def test_forward_call_en_us(self):
input_text = "Hello Kitty!"
expected_output = [char for char in "həˈɫoʊ KITTY!"]
g2p = self._create_g2p(locale="en-US")
Expand All @@ -183,7 +199,7 @@ def test_ipa_g2p_en_us(self):

@pytest.mark.run_only_on('CPU')
@pytest.mark.unit
def test_ipa_g2p_es_es(self):
def test_forward_call_es_es(self):
input_text = "¿Hola mundo, amigo?"
expected_output = [char for char in "¿ˈola mˈundo, AMIGO?"]
g2p = self._create_g2p(phoneme_dict=self.PHONEME_DICT_PATH_ES, locale="es-ES")
Expand Down

0 comments on commit 580ce4a

Please sign in to comment.