Merge pull request #2249 from yt605155624/fix_mix_cli

[tts][cli]update mix tts
PaddlePaddle · Aug 15, 2022 · ac38505 · ac38505
2 parents ed18b08 + 5d515f3
commit ac38505
Show file tree

Hide file tree

Showing 3 changed files with 25 additions and 5 deletions.
diff --git a/paddlespeech/cli/tts/infer.py b/paddlespeech/cli/tts/infer.py
@@ -255,9 +255,9 @@ def _init_from_path(
         else:
             use_pretrained_voc = False
         voc_lang = lang
-        # we must use ljspeech's voc for mix am now!
+        # When speaker is 174 (csmsc), use csmsc's vocoder is better than aishell3's
         if lang == 'mix':
-            voc_lang = 'en'
+            voc_lang = 'zh'
         voc_tag = voc + '-' + voc_lang
         self.task_resource.set_task_model(
             model_tag=voc_tag,

diff --git a/paddlespeech/resource/pretrained_models.py b/paddlespeech/resource/pretrained_models.py
@@ -672,6 +672,22 @@
             'speaker_dict':
             'speaker_id_map.txt',
         },
+        '2.0': {
+            'url':
+            'https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_ckpt_0.2.0.zip',
+            'md5':
+            '1d938e104e972386c8bfcbcc98a91587',
+            'config':
+            'default.yaml',
+            'ckpt':
+            'snapshot_iter_99200.pdz',
+            'speech_stats':
+            'speech_stats.npy',
+            'phones_dict':
+            'phone_id_map.txt',
+            'speaker_dict':
+            'speaker_id_map.txt',
+        },
     },
     # tacotron2
     "tacotron2_csmsc-zh": {

diff --git a/tests/unit/cli/test_cli.sh b/tests/unit/cli/test_cli.sh
@@ -56,9 +56,13 @@ paddlespeech tts --am tacotron2_ljspeech --voc pwgan_ljspeech --lang en --input
 # mix tts
 # The `am` must be `fastspeech2_mix`!
 # The `lang` must be `mix`!
-# The voc must be `hifigan_ljspeech` or `pwgan_ljspeech` for f`astspeech2_mix` now!
-paddlespeech tts --am fastspeech2_mix --voc hifigan_ljspeech --lang mix  --input "热烈欢迎您在 Discussions 中提交问题，并在 Issues 中指出发现的 bug。此外，我们非常希望您参与到 Paddle Speech 的开发中！" --spk_id 0  --output mix_spk0.wav
-paddlespeech tts --am fastspeech2_mix --voc pwgan_ljspeech --lang mix  --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --spk_id 1  --output mix_spk1.wav
+# The voc must be chinese datasets' voc now!
+# spk 174 is csmcc, spk 175 is ljspeech
+paddlespeech tts --am fastspeech2_mix --voc hifigan_csmsc --lang mix --input "热烈欢迎您在 Discussions 中提交问题，并在 Issues 中指出发现的 bug。此外，我们非常希望您参与到 Paddle Speech 的开发中！" --spk_id 174 --output mix_spk174.wav
+paddlespeech tts --am fastspeech2_mix --voc hifigan_aishell3 --lang mix --input "热烈欢迎您在 Discussions 中提交问题，并在 Issues 中指出发现的 bug。此外，我们非常希望您参与到 Paddle Speech 的开发中！" --spk_id 174 --output mix_spk174_aishell3.wav
+paddlespeech tts --am fastspeech2_mix --voc pwgan_csmsc --lang mix --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --spk_id 175 --output mix_spk175_pwgan.wav
+paddlespeech tts --am fastspeech2_mix --voc hifigan_csmsc --lang mix --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --spk_id 175 --output mix_spk175.wav
+
 
 # Speech Translation (only support linux)
 paddlespeech st --input ./en.wav