Skip to content

Commit

Permalink
Add C++ runtime for Matcha-TTS (#1627)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Dec 31, 2024
1 parent 5c2cc48 commit 2c2926a
Show file tree
Hide file tree
Showing 33 changed files with 1,397 additions and 86 deletions.
34 changes: 34 additions & 0 deletions .github/scripts/test-offline-tts.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,40 @@ which $EXE
# test waves are saved in ./tts
mkdir ./tts

log "------------------------------------------------------------"
log "matcha-icefall-zh-baker"
log "------------------------------------------------------------"
curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
tar xvf matcha-icefall-zh-baker.tar.bz2
rm matcha-icefall-zh-baker.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx

$EXE \
--matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
--matcha-vocoder=./hifigan_v2.onnx \
--matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \
--matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \
--matcha-dict-dir=./matcha-icefall-zh-baker/dict \
--num-threads=2 \
--debug=1 \
--output-filename=./tts/matcha-baker-zh-1.wav \
'小米的使命是,始终坚持做"感动人心、价格厚道"的好产品,让全球每个人都能享受科技带来的美好生活'

$EXE \
--matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
--matcha-vocoder=./hifigan_v2.onnx \
--matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \
--matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \
--matcha-dict-dir=./matcha-icefall-zh-baker/dict \
--num-threads=2 \
--debug=1 \
--output-filename=./tts/matcha-baker-zh-2.wav \
"当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。"

rm hifigan_v2.onnx
rm -rf matcha-icefall-zh-baker

log "------------------------------------------------------------"
log "vits-piper-en_US-amy-low"
log "------------------------------------------------------------"
Expand Down
20 changes: 20 additions & 0 deletions .github/scripts/test-python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,26 @@ mkdir ./tts

log "vits-ljs test"

curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
tar xvf matcha-icefall-zh-baker.tar.bz2
rm matcha-icefall-zh-baker.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx

python3 ./python-api-examples/offline-tts.py \
--matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
--matcha-vocoder=./hifigan_v2.onnx \
--matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \
--matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \
--tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
--matcha-dict-dir=./matcha-icefall-zh-baker/dict \
--output-filename=./tts/test-matcha.wav \
"某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。"

rm -rf matcha-icefall-zh-baker
rm hifigan_v2.onnx


curl -LS -O https://huggingface.co/csukuangfj/vits-ljs/resolve/main/vits-ljs.onnx
curl -LS -O https://huggingface.co/csukuangfj/vits-ljs/resolve/main/lexicon.txt
curl -LS -O https://huggingface.co/csukuangfj/vits-ljs/resolve/main/tokens.txt
Expand Down
30 changes: 17 additions & 13 deletions .github/workflows/linux.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,23 @@ jobs:
name: release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
path: install/*

- name: Test offline TTS
if: matrix.with_tts == 'ON'
shell: bash
run: |
du -h -d1 .
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline-tts
.github/scripts/test-offline-tts.sh
du -h -d1 .
- uses: actions/upload-artifact@v4
if: matrix.with_tts == 'ON'
with:
name: tts-generated-test-files-${{ matrix.build_type }}-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
path: tts

- name: Test offline Moonshine
if: matrix.build_type != 'Debug'
shell: bash
Expand Down Expand Up @@ -309,16 +326,7 @@ jobs:
.github/scripts/test-offline-whisper.sh
du -h -d1 .
- name: Test offline TTS
if: matrix.with_tts == 'ON'
shell: bash
run: |
du -h -d1 .
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline-tts
.github/scripts/test-offline-tts.sh
du -h -d1 .
- name: Test online paraformer
shell: bash
Expand Down Expand Up @@ -367,8 +375,4 @@ jobs:
overwrite: true
file: sherpa-onnx-*.tar.bz2

- uses: actions/upload-artifact@v4
with:
name: tts-generated-test-files-${{ matrix.build_type }}-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
path: tts

18 changes: 9 additions & 9 deletions .github/workflows/macos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,15 @@ jobs:
otool -L build/bin/sherpa-onnx
otool -l build/bin/sherpa-onnx
- name: Test offline TTS
if: matrix.with_tts == 'ON'
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline-tts
.github/scripts/test-offline-tts.sh
- name: Test offline Moonshine
if: matrix.build_type != 'Debug'
shell: bash
Expand Down Expand Up @@ -226,15 +235,6 @@ jobs:
.github/scripts/test-kws.sh
- name: Test offline TTS
if: matrix.with_tts == 'ON'
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline-tts
.github/scripts/test-offline-tts.sh
- name: Test online paraformer
shell: bash
run: |
Expand Down
93 changes: 85 additions & 8 deletions python-api-examples/offline-tts-play.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
Usage:
Example (1/3)
Example (1/4)
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
tar xf vits-piper-en_US-amy-low.tar.bz2
Expand All @@ -23,7 +23,7 @@
--output-filename=./generated.wav \
"Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."
Example (2/3)
Example (2/4)
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2
tar xvf vits-zh-aishell3.tar.bz2
Expand All @@ -37,7 +37,7 @@
--output-filename=./liubei-21.wav \
"勿以恶小而为之,勿以善小而不为。惟贤惟德,能服于人。122334"
Example (3/3)
Example (3/4)
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2
tar xvf sherpa-onnx-vits-zh-ll.tar.bz2
Expand All @@ -53,6 +53,24 @@
--output-filename=./test-2.wav \
"当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。2024年5月11号,拨打110或者18920240511。123456块钱。"
Example (4/4)
curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
tar xvf matcha-icefall-zh-baker.tar.bz2
rm matcha-icefall-zh-baker.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
python3 ./python-api-examples/offline-tts-play.py \
--matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
--matcha-vocoder=./hifigan_v2.onnx \
--matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \
--matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \
--tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
--matcha-dict-dir=./matcha-icefall-zh-baker/dict \
--output-filename=./test-matcha.wav \
"某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。"
You can find more models at
https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
Expand Down Expand Up @@ -84,14 +102,11 @@
sys.exit(-1)


def get_args():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)

def add_vits_args(parser):
parser.add_argument(
"--vits-model",
type=str,
default="",
help="Path to vits model.onnx",
)

Expand Down Expand Up @@ -124,6 +139,60 @@ def get_args():
help="Path to the dict directory for models using jieba",
)


def add_matcha_args(parser):
parser.add_argument(
"--matcha-acoustic-model",
type=str,
default="",
help="Path to model.onnx for matcha",
)

parser.add_argument(
"--matcha-vocoder",
type=str,
default="",
help="Path to vocoder for matcha",
)

parser.add_argument(
"--matcha-lexicon",
type=str,
default="",
help="Path to lexicon.txt for matcha",
)

parser.add_argument(
"--matcha-tokens",
type=str,
default="",
help="Path to tokens.txt for matcha",
)

parser.add_argument(
"--matcha-data-dir",
type=str,
default="",
help="""Path to the dict directory of espeak-ng. If it is specified,
--matcha-lexicon and --matcha-tokens are ignored""",
)

parser.add_argument(
"--matcha-dict-dir",
type=str,
default="",
help="Path to the dict directory for models using jieba",
)


def get_args():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)

add_vits_args(parser)
add_matcha_args(parser)

parser.add_argument(
"--tts-rule-fsts",
type=str,
Expand Down Expand Up @@ -313,6 +382,14 @@ def main():
dict_dir=args.vits_dict_dir,
tokens=args.vits_tokens,
),
matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(
acoustic_model=args.matcha_acoustic_model,
vocoder=args.matcha_vocoder,
lexicon=args.matcha_lexicon,
tokens=args.matcha_tokens,
data_dir=args.matcha_data_dir,
dict_dir=args.matcha_dict_dir,
),
provider=args.provider,
debug=args.debug,
num_threads=args.num_threads,
Expand Down
Loading

0 comments on commit 2c2926a

Please sign in to comment.