Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor TTS Android code to support jieba for Chinese TTS models #800

Merged
merged 4 commits into from
Apr 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,5 @@ sr-data
vits-icefall-*
sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12
spoken-language-identification-test-wavs
my-release-key*
vits-zh-hf-fanchen-C
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ class MainActivity : AppCompatActivity() {
var ruleFars: String?
var lexicon: String?
var dataDir: String?
var dictDir: String?
var assets: AssetManager? = application.assets

// The purpose of such a design is to make the CI test easier
Expand All @@ -169,6 +170,7 @@ class MainActivity : AppCompatActivity() {
ruleFars = null
lexicon = null
dataDir = null
dictDir = null

// Example 1:
// modelDir = "vits-vctk"
Expand All @@ -191,21 +193,36 @@ class MainActivity : AppCompatActivity() {
// lexicon = "lexicon.txt"

// Example 4:
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#csukuangfj-vits-zh-hf-fanchen-c-chinese-187-speakers
// modelDir = "vits-zh-hf-fanchen-C"
// modelName = "vits-zh-hf-fanchen-C.onnx"
// lexicon = "lexicon.txt"
// dictDir = "vits-zh-hf-fanchen-C/dict"

// Example 5:
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2
// modelDir = "vits-coqui-de-css10"
// modelName = "model.onnx"
// lang = "deu"

if (dataDir != null) {
val newDir = copyDataDir(modelDir)
val newDir = copyDataDir(modelDir!!)
modelDir = newDir + "/" + modelDir
dataDir = newDir + "/" + dataDir
assets = null
}

if (dictDir != null) {
val newDir = copyDataDir( modelDir!!)
modelDir = newDir + "/" + modelDir
dictDir = modelDir + "/" + "dict"
ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
assets = null
}

val config = getOfflineTtsConfig(
modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "",
dataDir = dataDir ?: "",
dictDir = dictDir ?: "",
ruleFsts = ruleFsts ?: "",
ruleFars = ruleFars ?: "",
)!!
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ data class OfflineTtsVitsModelConfig(
var lexicon: String = "",
var tokens: String,
var dataDir: String = "",
var dictDir: String = "",
var noiseScale: Float = 0.667f,
var noiseScaleW: Float = 0.8f,
var lengthScale: Float = 1.0f,
Expand Down Expand Up @@ -49,7 +50,7 @@ class OfflineTts(

init {
if (assetManager != null) {
ptr = new(assetManager, config)
ptr = newFromAsset(assetManager, config)
} else {
ptr = newFromFile(config)
}
Expand Down Expand Up @@ -87,7 +88,7 @@ class OfflineTts(
fun allocate(assetManager: AssetManager? = null) {
if (ptr == 0L) {
if (assetManager != null) {
ptr = new(assetManager, config)
ptr = newFromAsset(assetManager, config)
} else {
ptr = newFromFile(config)
}
Expand All @@ -105,7 +106,7 @@ class OfflineTts(
delete(ptr)
}

private external fun new(
private external fun newFromAsset(
assetManager: AssetManager,
config: OfflineTtsConfig,
): Long
Expand Down Expand Up @@ -152,6 +153,7 @@ fun getOfflineTtsConfig(
modelName: String,
lexicon: String,
dataDir: String,
dictDir: String,
ruleFsts: String,
ruleFars: String
): OfflineTtsConfig? {
Expand All @@ -161,7 +163,8 @@ fun getOfflineTtsConfig(
model = "$modelDir/$modelName",
lexicon = "$modelDir/$lexicon",
tokens = "$modelDir/tokens.txt",
dataDir = "$dataDir"
dataDir = dataDir,
dictDir = dictDir,
),
numThreads = 2,
debug = true,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ object TtsEngine {
private var ruleFars: String? = null
private var lexicon: String? = null
private var dataDir: String? = null
private var dictDir: String? = null
private var assets: AssetManager? = null

init {
Expand All @@ -54,6 +55,7 @@ object TtsEngine {
ruleFars = null
lexicon = null
dataDir = null
dictDir = null
lang = null

// Please enable one and only one of the examples below
Expand Down Expand Up @@ -83,6 +85,14 @@ object TtsEngine {
// lang = "zho"

// Example 4:
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#csukuangfj-vits-zh-hf-fanchen-c-chinese-187-speakers
// modelDir = "vits-zh-hf-fanchen-C"
// modelName = "vits-zh-hf-fanchen-C.onnx"
// lexicon = "lexicon.txt"
// dictDir = "vits-zh-hf-fanchen-C/dict"
// lang = "zho"

// Example 5:
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2
// This model does not need lexicon or dataDir
// modelDir = "vits-coqui-de-css10"
Expand All @@ -108,9 +118,18 @@ object TtsEngine {
assets = null
}

if (dictDir != null) {
val newDir = copyDataDir(context, modelDir!!)
modelDir = newDir + "/" + modelDir
dictDir = modelDir + "/" + "dict"
ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
assets = null
}

val config = getOfflineTtsConfig(
modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "",
dataDir = dataDir ?: "",
dictDir = dictDir ?: "",
ruleFsts = ruleFsts ?: "",
ruleFars = ruleFars ?: ""
)!!
Expand Down
2 changes: 1 addition & 1 deletion build-android-arm64-v8a.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ onnxruntime_version=1.17.1
if [ ! -f $onnxruntime_version/jni/arm64-v8a/libonnxruntime.so ]; then
mkdir -p $onnxruntime_version
pushd $onnxruntime_version
wget -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip
wget -c -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip
unzip onnxruntime-android-${onnxruntime_version}.zip
rm onnxruntime-android-${onnxruntime_version}.zip
popd
Expand Down
2 changes: 1 addition & 1 deletion build-android-armv7-eabi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ onnxruntime_version=1.17.1
if [ ! -f $onnxruntime_version/jni/armeabi-v7a/libonnxruntime.so ]; then
mkdir -p $onnxruntime_version
pushd $onnxruntime_version
wget -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip
wget -c -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip
unzip onnxruntime-android-${onnxruntime_version}.zip
rm onnxruntime-android-${onnxruntime_version}.zip
popd
Expand Down
2 changes: 1 addition & 1 deletion build-android-x86-64.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ onnxruntime_version=1.17.1
if [ ! -f $onnxruntime_version/jni/x86_64/libonnxruntime.so ]; then
mkdir -p $onnxruntime_version
pushd $onnxruntime_version
wget -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip
wget -c -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip
unzip onnxruntime-android-${onnxruntime_version}.zip
rm onnxruntime-android-${onnxruntime_version}.zip
popd
Expand Down
2 changes: 1 addition & 1 deletion build-android-x86.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ onnxruntime_version=1.17.1
if [ ! -f $onnxruntime_version/jni/x86/libonnxruntime.so ]; then
mkdir -p $onnxruntime_version
pushd $onnxruntime_version
wget -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip
wget -c -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip
unzip onnxruntime-android-${onnxruntime_version}.zip
rm onnxruntime-android-${onnxruntime_version}.zip
popd
Expand Down
5 changes: 5 additions & 0 deletions scripts/apk/build-apk-tts-engine.sh.in
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt
sed -i.bak s%"ruleFsts = null"%"ruleFars = \"$rule_fars\""% ./TtsEngine.kt
{% endif %}

{% if tts_model.dict_dir %}
dict_dir={{ tts_model.dict_dir }}
sed -i.bak s%"dictDir = null"%"dictDir = \"$dict_dir\""% ./TtsEngine.kt
{% endif %}

{% if tts_model.data_dir %}
data_dir={{ tts_model.data_dir }}
sed -i.bak s%"dataDir = null"%"dataDir = \"$data_dir\""% ./TtsEngine.kt
Expand Down
5 changes: 5 additions & 0 deletions scripts/apk/build-apk-tts.sh.in
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@ sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./MainActivity.kt
sed -i.bak s%"ruleFsts = null"%"ruleFars = \"$rule_fars\""% ./MainActivity.kt
{% endif %}

{% if tts_model.dict_dir %}
dict_dir={{ tts_model.dict_dir }}
sed -i.bak s%"dictDir = null"%"dictDir = \"$dict_dir\""% ./MainActivity.kt
{% endif %}

{% if tts_model.data_dir %}
data_dir={{ tts_model.data_dir }}
sed -i.bak s%"dataDir = null"%"dataDir = \"$data_dir\""% ./MainActivity.kt
Expand Down
9 changes: 8 additions & 1 deletion scripts/apk/generate-tts-apk-script.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class TtsModel:
rule_fsts: Optional[List[str]] = None
rule_fars: Optional[List[str]] = None
data_dir: Optional[str] = None
dict_dir: Optional[str] = None
is_char: bool = False
lang_iso_639_3: str = ""

Expand Down Expand Up @@ -326,8 +327,14 @@ def get_vits_models() -> List[TtsModel]:
rule_fsts = ["phone.fst", "date.fst", "number.fst", "new_heteronym.fst"]
for m in chinese_models:
s = [f"{m.model_dir}/{r}" for r in rule_fsts]
if "vits-zh-hf" in m.model_dir:
s = s[:-1]
m.dict_dir = m.model_dir + "/dict"

m.rule_fsts = ",".join(s)
m.rule_fars = f"{m.model_dir}/rule.far"

if "vits-zh-hf" not in m.model_dir:
m.rule_fars = f"{m.model_dir}/rule.far"

all_models = chinese_models + [
TtsModel(
Expand Down
2 changes: 1 addition & 1 deletion sherpa-onnx/csrc/audio-tagging-model-config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ bool AudioTaggingModelConfig::Validate() const {
}

if (!ced.empty() && !FileExists(ced)) {
SHERPA_ONNX_LOGE("CED model file %s does not exist", ced.c_str());
SHERPA_ONNX_LOGE("CED model file '%s' does not exist", ced.c_str());
return false;
}

Expand Down
2 changes: 1 addition & 1 deletion sherpa-onnx/csrc/audio-tagging.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ bool AudioTaggingConfig::Validate() const {
}

if (!FileExists(labels)) {
SHERPA_ONNX_LOGE("--labels %s does not exist", labels.c_str());
SHERPA_ONNX_LOGE("--labels '%s' does not exist", labels.c_str());
return false;
}

Expand Down
4 changes: 2 additions & 2 deletions sherpa-onnx/csrc/file-utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#include <fstream>
#include <string>

#include "sherpa-onnx/csrc/log.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

Expand All @@ -17,7 +17,7 @@ bool FileExists(const std::string &filename) {

void AssertFileExists(const std::string &filename) {
if (!FileExists(filename)) {
SHERPA_ONNX_LOG(FATAL) << filename << " does not exist!";
SHERPA_ONNX_LOGE("filename '%s' does not exist", filename.c_str());
exit(-1);
}
}
Expand Down
8 changes: 8 additions & 0 deletions sherpa-onnx/csrc/jieba-lexicon.cc
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,14 @@ class JiebaLexicon::Impl {
if (token2id_.count(p.first) && !token2id_.count(p.second)) {
token2id_[p.second] = token2id_[p.first];
}

if (!token2id_.count(p.first) && token2id_.count(p.second)) {
token2id_[p.first] = token2id_[p.second];
}
}

if (!token2id_.count("、") && token2id_.count(",")) {
token2id_["、"] = token2id_[","];
}
}

Expand Down
3 changes: 2 additions & 1 deletion sherpa-onnx/csrc/keyword-spotter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@ bool KeywordSpotterConfig::Validate() const {
// Solution: take keyword_file variable is directly
// parsed as a string of keywords
if (!std::ifstream(keywords_file.c_str()).good()) {
SHERPA_ONNX_LOGE("Keywords file %s does not exist.", keywords_file.c_str());
SHERPA_ONNX_LOGE("Keywords file '%s' does not exist.",
keywords_file.c_str());
return false;
}
#endif
Expand Down
2 changes: 1 addition & 1 deletion sherpa-onnx/csrc/offline-ctc-fst-decoder-config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ void OfflineCtcFstDecoderConfig::Register(ParseOptions *po) {

bool OfflineCtcFstDecoderConfig::Validate() const {
if (!graph.empty() && !FileExists(graph)) {
SHERPA_ONNX_LOGE("graph: %s does not exist", graph.c_str());
SHERPA_ONNX_LOGE("graph: '%s' does not exist", graph.c_str());
return false;
}
return true;
Expand Down
2 changes: 1 addition & 1 deletion sherpa-onnx/csrc/offline-lm-config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ void OfflineLMConfig::Register(ParseOptions *po) {

bool OfflineLMConfig::Validate() const {
if (!FileExists(model)) {
SHERPA_ONNX_LOGE("%s does not exist", model.c_str());
SHERPA_ONNX_LOGE("'%s' does not exist", model.c_str());
return false;
}

Expand Down
2 changes: 1 addition & 1 deletion sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model-config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ void OfflineNemoEncDecCtcModelConfig::Register(ParseOptions *po) {

bool OfflineNemoEncDecCtcModelConfig::Validate() const {
if (!FileExists(model)) {
SHERPA_ONNX_LOGE("NeMo model: %s does not exist", model.c_str());
SHERPA_ONNX_LOGE("NeMo model: '%s' does not exist", model.c_str());
return false;
}

Expand Down
2 changes: 1 addition & 1 deletion sherpa-onnx/csrc/offline-paraformer-model-config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ void OfflineParaformerModelConfig::Register(ParseOptions *po) {

bool OfflineParaformerModelConfig::Validate() const {
if (!FileExists(model)) {
SHERPA_ONNX_LOGE("Paraformer model %s does not exist", model.c_str());
SHERPA_ONNX_LOGE("Paraformer model '%s' does not exist", model.c_str());
return false;
}

Expand Down
6 changes: 3 additions & 3 deletions sherpa-onnx/csrc/offline-transducer-model-config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,19 @@ void OfflineTransducerModelConfig::Register(ParseOptions *po) {

bool OfflineTransducerModelConfig::Validate() const {
if (!FileExists(encoder_filename)) {
SHERPA_ONNX_LOGE("transducer encoder: %s does not exist",
SHERPA_ONNX_LOGE("transducer encoder: '%s' does not exist",
encoder_filename.c_str());
return false;
}

if (!FileExists(decoder_filename)) {
SHERPA_ONNX_LOGE("transducer decoder: %s does not exist",
SHERPA_ONNX_LOGE("transducer decoder: '%s' does not exist",
decoder_filename.c_str());
return false;
}

if (!FileExists(joiner_filename)) {
SHERPA_ONNX_LOGE("transducer joiner: %s does not exist",
SHERPA_ONNX_LOGE("transducer joiner: '%s' does not exist",
joiner_filename.c_str());
return false;
}
Expand Down
Loading
Loading