Skip to content

Commit

Permalink
Add sentence splitting tests
Browse files Browse the repository at this point in the history
  • Loading branch information
synesthesiam committed Jul 27, 2023
1 parent faf86b9 commit e6b0e55
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 2 deletions.
11 changes: 10 additions & 1 deletion src/python_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
de_phonemes = phonemize_espeak("licht!", "de")

# "lˈɪçt!" where "ç" is decomposed into two codepoints
assert de_phonemes == [["l", "ˈ", "ɪ", "c", "̧", "t", "!"]]
assert de_phonemes == [["l", "ˈ", "ɪ", "c", "̧", "t", "!"]], de_phonemes

# phoneme -> [id, ...]
espeak_map = get_espeak_map()
Expand All @@ -44,6 +44,15 @@

# -----------------------------------------------------------------------------

# Capitalization is required to get espeak to split the sentences.
en_phonemes = phonemize_espeak("Test 1. Test2.", "en-us")
assert en_phonemes == [
["t", "ˈ", "ɛ", "s", "t", " ", "w", "ˈ", "ʌ", "n", "."],
["t", "ˈ", "ɛ", "s", "t", " ", "t", "ˈ", "u", "ː", "."],
], en_phonemes

# -----------------------------------------------------------------------------

codepoints_map = get_codepoints_map()
assert "uk" in codepoints_map, "uk not supported"
uk_phonemes = phonemize_codepoints("ВЕСЕ́ЛКА")
Expand Down
16 changes: 15 additions & 1 deletion src/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ phonemeString(const std::vector<std::vector<piper::Phoneme>> &phonemes) {

phonemeStr << una::utf32to8(phonemeU32Str);
}

phonemeStr << "\n";
}

return phonemeStr.str();
Expand Down Expand Up @@ -82,11 +84,23 @@ int main(int argc, char *argv[]) {
piper::phonemize_eSpeak("this, is: a; test.", phonemeConfig, phonemes);

std::string phonemeStr = phonemeString(phonemes);
if (phonemeStr != "ðˈɪs, ɪz: ˈeɪ; tˈɛst.") {
if (phonemeStr != "ðˈɪs, ɪz: ˈeɪ; tˈɛst.\n") {
std::cerr << "punctuation test: " << phonemeStr << std::endl;
return 1;
}

// Check sentence splitting.
phonemes.clear();

// Capitalization is required to get espeak to split the sentences.
piper::phonemize_eSpeak("Test 1. Test 2.", phonemeConfig, phonemes);

phonemeStr = phonemeString(phonemes);
if (phonemeStr != "tˈɛst wˈʌn.\ntˈɛst tˈuː.\n") {
std::cerr << "sentence split: " << phonemeStr << std::endl;
return 1;
}

// Check "ВЕСЕ́ЛКА" in Ukrainian
piper::CodepointsPhonemeConfig codepointsConfig;
phonemes.clear();
Expand Down

0 comments on commit e6b0e55

Please sign in to comment.