diff --git a/nemo/collections/tts/modules/common.py b/nemo/collections/tts/modules/common.py index 7eff0c4c3baf..4c566789c563 100644 --- a/nemo/collections/tts/modules/common.py +++ b/nemo/collections/tts/modules/common.py @@ -14,8 +14,7 @@ ############################################################################### -import ast -from typing import List, Optional, Tuple +from typing import Optional, Tuple import numpy as np import torch diff --git a/nemo/collections/tts/modules/radtts.py b/nemo/collections/tts/modules/radtts.py index 83bbcda58230..8de98dc4d1fc 100644 --- a/nemo/collections/tts/modules/radtts.py +++ b/nemo/collections/tts/modules/radtts.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pdb ############################################################################### import torch @@ -32,7 +31,6 @@ LinearNorm, get_mask_from_lengths, getRadTTSEncoder, - sort_tensor, ) from nemo.collections.tts.modules.submodules import PartialConv1d from nemo.core.classes import Exportable, NeuralModule diff --git a/nemo/collections/tts/modules/submodules.py b/nemo/collections/tts/modules/submodules.py index 90dd822e1650..b7574ed9ddf4 100644 --- a/nemo/collections/tts/modules/submodules.py +++ b/nemo/collections/tts/modules/submodules.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import List, Optional, Tuple +from typing import Optional, Tuple import torch from torch.autograd import Variable diff --git a/nemo_text_processing/g2p/modules.py b/nemo_text_processing/g2p/modules.py index 924fa9f3c716..b9e6c5b51514 100644 --- a/nemo_text_processing/g2p/modules.py +++ b/nemo_text_processing/g2p/modules.py @@ -321,14 +321,14 @@ def __init__( # load the dictionary file where there may exist a digit suffix after a word, which # represents the pronunciation variant of that word. phoneme_dict_obj = defaultdict(list) - _alt_re = re.compile(r'\([0-9]+\)') + _alt_re = re.compile(r"\([0-9]+\)") with open(phoneme_dict, "r") as fdict: for line in fdict: if len(line) and ('A' <= line[0] <= 'Z' or line[0] == "'"): - parts = line.strip().split(" ") - assert len(parts) == 2, f"Wrong format for the entry: {line.strip()}." - word = re.sub(_alt_re, '', parts[0]) - phoneme_dict_obj[word].append(list(parts[1])) + parts = line.strip().split(maxsplit=1) + word = re.sub(_alt_re, "", parts[0]) + prons = re.sub(r"\s+", "", parts[1]) + phoneme_dict_obj[word].append(list(prons)) else: # Load phoneme_dict as dictionary object logging.info("Loading phoneme_dict as a Dict object.") diff --git a/nemo_text_processing/text_normalization/zh/taggers/tokenize_and_classify.py b/nemo_text_processing/text_normalization/zh/taggers/tokenize_and_classify.py index 1c51c44dd1ef..2d3c13e0acbb 100644 --- a/nemo_text_processing/text_normalization/zh/taggers/tokenize_and_classify.py +++ b/nemo_text_processing/text_normalization/zh/taggers/tokenize_and_classify.py @@ -14,7 +14,7 @@ import os import pynini -from nemo_text_processing.text_normalization.zh.graph_utils import NEMO_SIGMA, GraphFst +from nemo_text_processing.text_normalization.zh.graph_utils import GraphFst from nemo_text_processing.text_normalization.zh.taggers.cardinal import Cardinal from nemo_text_processing.text_normalization.zh.taggers.char import Char from nemo_text_processing.text_normalization.zh.taggers.date import Date diff --git a/tests/collections/tts/test_torch_tts.py b/tests/collections/tts/test_torch_tts.py index 7054b016b332..f99ebda8880d 100644 --- a/tests/collections/tts/test_torch_tts.py +++ b/tests/collections/tts/test_torch_tts.py @@ -16,7 +16,6 @@ import os from pathlib import Path -import numpy as np import pytest import torch from nemo_text_processing.g2p.modules import EnglishG2p diff --git a/tests/collections/tts/test_waveglow.py b/tests/collections/tts/test_waveglow.py index ff873e540e84..889fc9c58616 100644 --- a/tests/collections/tts/test_waveglow.py +++ b/tests/collections/tts/test_waveglow.py @@ -14,15 +14,12 @@ import os import tempfile -from unittest import TestCase -import onnx import pytest import torch from omegaconf import DictConfig from nemo.collections.tts.models import WaveGlowModel -from nemo.collections.tts.modules import WaveGlowModule from nemo.core.classes import typecheck mcfg = DictConfig( diff --git a/tests/nemo_text_processing/g2p/phoneme_dict/test_dict.txt b/tests/nemo_text_processing/g2p/phoneme_dict/test_dict.txt index 664b79c5428c..455c8e02e49b 100644 --- a/tests/nemo_text_processing/g2p/phoneme_dict/test_dict.txt +++ b/tests/nemo_text_processing/g2p/phoneme_dict/test_dict.txt @@ -1,2 +1,5 @@ HELLO həˈɫoʊ -WORLD ˈwɝɫd +WORLD ˈwɝɫd +LEAD ˈlɛd +LEAD(1) ˈ l i d +NVIDIA ɛ n ˈ v ɪ d i ə diff --git a/tests/nemo_text_processing/g2p/test_modules.py b/tests/nemo_text_processing/g2p/test_modules.py index 0fff1018de78..d3ab4128855c 100644 --- a/tests/nemo_text_processing/g2p/test_modules.py +++ b/tests/nemo_text_processing/g2p/test_modules.py @@ -18,7 +18,7 @@ from nemo_text_processing.g2p.modules import IPAG2P -class TestModules: +class TestIPAG2P: PHONEME_DICT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "phoneme_dict") PHONEME_DICT_PATH_DE = os.path.join(PHONEME_DICT_DIR, "test_dict_de.txt") @@ -45,11 +45,14 @@ def _create_g2p( @pytest.mark.run_only_on('CPU') @pytest.mark.unit - def test_ipa_g2p_parse_dict(self): + def test_normalize_dict_with_phonemes(self): # fmt: off expected_symbols = { - 'h', 'ə', 'ˈ', 'ɫ', 'o', 'ʊ', 'ˈ', - 'w', 'ɝ', 'ɫ', 'd' + 'h', 'ə', 'ˈ', 'ɫ', 'o', 'ʊ', + 'ˈ', 'w', 'ɝ', 'ɫ', 'd', + 'ˈ', 'l', 'ɛ', 'd', + 'ˈ', 'l', 'i', 'd', + 'ɛ', 'n', 'ˈ', 'v', 'ɪ', 'd', 'i', 'ə' } # fmt: on g2p = self._create_g2p() @@ -57,18 +60,27 @@ def test_ipa_g2p_parse_dict(self): assert expected_symbols == g2p.symbols assert len(g2p.phoneme_dict["HELLO"]) == 1 assert len(g2p.phoneme_dict["WORLD"]) == 1 - assert g2p.phoneme_dict["HELLO"][0] == [char for char in "həˈɫoʊ"] - assert g2p.phoneme_dict["WORLD"][0] == [char for char in "ˈwɝɫd"] + assert len(g2p.phoneme_dict["LEAD"]) == 2 + assert len(g2p.phoneme_dict["NVIDIA"]) == 1 + assert g2p.phoneme_dict["HELLO"][0] == list("həˈɫoʊ") + assert g2p.phoneme_dict["WORLD"][0] == list("ˈwɝɫd") + assert g2p.phoneme_dict["LEAD"] == [list("ˈlɛd"), list("ˈlid")] + assert g2p.phoneme_dict["NVIDIA"][0] == list("ɛnˈvɪdiə") @pytest.mark.run_only_on('CPU') @pytest.mark.unit - def test_ipa_g2p_parse_dict_with_chars(self): + def test_normalize_dict_with_graphemes_and_phonemes(self): # fmt: off expected_symbols = { 'H', 'E', 'L', 'L', 'O', 'W', 'O', 'R', 'L', 'D', + 'L', 'E', 'A', 'D', + 'N', 'V', 'I', 'D', 'I', 'A', 'h', 'ə', 'ˈ', 'ɫ', 'o', 'ʊ', - 'ˈ', 'w', 'ɝ', 'ɫ', 'd' + 'ˈ', 'w', 'ɝ', 'ɫ', 'd', + 'ˈ', 'l', 'ɛ', 'd', + 'ˈ', 'l', 'i', 'd', + 'ɛ', 'n', 'ˈ', 'v', 'ɪ', 'd', 'i', 'ə' } # fmt: on g2p = self._create_g2p(use_chars=True) @@ -76,12 +88,16 @@ def test_ipa_g2p_parse_dict_with_chars(self): assert expected_symbols == g2p.symbols assert len(g2p.phoneme_dict["HELLO"]) == 1 assert len(g2p.phoneme_dict["WORLD"]) == 1 - assert g2p.phoneme_dict["HELLO"][0] == [char for char in "həˈɫoʊ"] - assert g2p.phoneme_dict["WORLD"][0] == [char for char in "ˈwɝɫd"] + assert len(g2p.phoneme_dict["LEAD"]) == 2 + assert len(g2p.phoneme_dict["NVIDIA"]) == 1 + assert g2p.phoneme_dict["HELLO"][0] == list("həˈɫoʊ") + assert g2p.phoneme_dict["WORLD"][0] == list("ˈwɝɫd") + assert g2p.phoneme_dict["LEAD"] == [list("ˈlɛd"), list("ˈlid")] + assert g2p.phoneme_dict["NVIDIA"][0] == list("ɛnˈvɪdiə") @pytest.mark.run_only_on('CPU') @pytest.mark.unit - def test_ipa_g2p(self): + def test_forward_call(self): input_text = "Hello world." expected_output = [char for char in "həˈɫoʊ ˈwɝɫd."] g2p = self._create_g2p() @@ -91,11 +107,11 @@ def test_ipa_g2p(self): @pytest.mark.run_only_on('CPU') @pytest.mark.unit - def test_ipa_g2p_with_dict_input(self): + def test_forward_call_with_file_or_object_dict_type(self): input_text = "Hello world." expected_output = [char for char in "həˈɫoʊ ˈwɝɫd."] - phoneme_dict = {"HELLO": ["həˈɫoʊ"], "WORLD": ["ˈwɝɫd"]} + phoneme_dict = {"HELLO": ["həˈɫoʊ"], "WORLD": ["ˈwɝɫd"], "LEAD": ["ˈlɛd", "ˈlid"], "NVIDIA": ["ɛnˈvɪdiə"]} g2p_file = self._create_g2p() g2p_dict = self._create_g2p(phoneme_dict=phoneme_dict) @@ -107,7 +123,7 @@ def test_ipa_g2p_with_dict_input(self): @pytest.mark.run_only_on('CPU') @pytest.mark.unit - def test_ipa_g2p_with_oov(self): + def test_forward_call_with_oov_word(self): input_text = "Hello Kitty!" expected_output = [char for char in "həˈɫoʊ KITTY!"] g2p = self._create_g2p() @@ -117,7 +133,7 @@ def test_ipa_g2p_with_oov(self): @pytest.mark.run_only_on('CPU') @pytest.mark.unit - def test_ipa_g2p_with_oov_func(self): + def test_forward_call_with_oov_func(self): input_text = "Hello Kitty!" expected_output = [char for char in "həˈɫoʊ test!"] g2p = self._create_g2p(apply_to_oov_word=lambda x: "test") @@ -127,7 +143,7 @@ def test_ipa_g2p_with_oov_func(self): @pytest.mark.run_only_on('CPU') @pytest.mark.unit - def test_ipa_g2p_graphemes(self): + def test_forward_call_with_graphemes_uppercase(self): input_text = "Hello world." expected_output = [char for char in input_text.upper()] g2p = self._create_g2p(use_chars=True, phoneme_probability=0.0) @@ -137,7 +153,7 @@ def test_ipa_g2p_graphemes(self): @pytest.mark.run_only_on('CPU') @pytest.mark.unit - def test_ipa_g2p_graphemes_lower(self): + def test_forward_call_with_graphemes_lowercase(self): input_text = "Hello world." expected_output = [char for char in input_text.lower()] g2p = self._create_g2p(use_chars=True, phoneme_probability=0.0, set_graphemes_upper=False) @@ -147,7 +163,7 @@ def test_ipa_g2p_graphemes_lower(self): @pytest.mark.run_only_on('CPU') @pytest.mark.unit - def test_ipa_g2p_with_escaped_characters(self): + def test_forward_call_with_escaped_characters(self): input_text = "Hello |wo rld|." expected_output = ["h", "ə", "ˈ", "ɫ", "o", "ʊ", " ", "wo", "rld", "."] g2p = self._create_g2p() @@ -157,13 +173,13 @@ def test_ipa_g2p_with_escaped_characters(self): @pytest.mark.run_only_on('CPU') @pytest.mark.unit - def test_ipa_g2p_unsupported_locale(self): + def test_instantiate_unsupported_locale(self): with pytest.raises(ValueError, match="Unsupported locale"): self._create_g2p(locale="en-USA") @pytest.mark.run_only_on('CPU') @pytest.mark.unit - def test_ipa_g2p_de_de(self): + def test_forward_call_de_de(self): input_text = "Hallo „welt“!" expected_output = [char for char in "hˈaloː „vˈɛlt“!"] g2p = self._create_g2p(phoneme_dict=self.PHONEME_DICT_PATH_DE, locale="de-DE") @@ -173,7 +189,7 @@ def test_ipa_g2p_de_de(self): @pytest.mark.run_only_on('CPU') @pytest.mark.unit - def test_ipa_g2p_en_us(self): + def test_forward_call_en_us(self): input_text = "Hello Kitty!" expected_output = [char for char in "həˈɫoʊ KITTY!"] g2p = self._create_g2p(locale="en-US") @@ -183,7 +199,7 @@ def test_ipa_g2p_en_us(self): @pytest.mark.run_only_on('CPU') @pytest.mark.unit - def test_ipa_g2p_es_es(self): + def test_forward_call_es_es(self): input_text = "¿Hola mundo, amigo?" expected_output = [char for char in "¿ˈola mˈundo, AMIGO?"] g2p = self._create_g2p(phoneme_dict=self.PHONEME_DICT_PATH_ES, locale="es-ES")