Skip to content

Commit

Permalink
[voicerss] Add support for voices (#10184)
Browse files Browse the repository at this point in the history
Signed-off-by: Laurent Garnier <[email protected]>
  • Loading branch information
lolodomo authored Feb 20, 2021
1 parent fd1c966 commit 17f7041
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ public AudioStream synthesize(String text, Voice voice, AudioFormat requestedFor
// only a default voice
try {
File cacheAudioFile = voiceRssImpl.getTextToSpeechAsFile(apiKey, trimmedText,
voice.getLocale().toLanguageTag(), getApiAudioFormat(requestedFormat));
voice.getLocale().toLanguageTag(), voice.getLabel(), getApiAudioFormat(requestedFormat));
if (cacheAudioFile == null) {
throw new TTSException("Could not read from VoiceRSS service");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import java.util.Locale;

import org.openhab.core.voice.Voice;
import org.openhab.voice.voicerss.internal.cloudapi.VoiceRSSCloudImpl;

/**
* Implementation of the Voice interface for VoiceRSS. Label is only "default"
Expand Down Expand Up @@ -54,7 +55,11 @@ public VoiceRSSVoice(Locale locale, String label) {
*/
@Override
public String getUID() {
return "voicerss:" + locale.toLanguageTag().replaceAll("[^a-zA-Z0-9_]", "");
String uid = "voicerss:" + locale.toLanguageTag().replaceAll("[^a-zA-Z0-9_]", "");
if (!label.equals(VoiceRSSCloudImpl.DEFAULT_VOICE)) {
uid += "_" + label.replaceAll("[^a-zA-Z0-9_]", "");
}
return uid;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,17 +55,17 @@ public CachedVoiceRSSCloudImpl(String cacheFolderName) {
}
}

public File getTextToSpeechAsFile(String apiKey, String text, String locale, String audioFormat)
public File getTextToSpeechAsFile(String apiKey, String text, String locale, String voice, String audioFormat)
throws IOException {
String fileNameInCache = getUniqueFilenameForText(text, locale);
String fileNameInCache = getUniqueFilenameForText(text, locale, voice);
// check if in cache
File audioFileInCache = new File(cacheFolder, fileNameInCache + "." + audioFormat.toLowerCase());
if (audioFileInCache.exists()) {
return audioFileInCache;
}

// if not in cache, get audio data and put to cache
try (InputStream is = super.getTextToSpeech(apiKey, text, locale, audioFormat);
try (InputStream is = super.getTextToSpeech(apiKey, text, locale, voice, audioFormat);
FileOutputStream fos = new FileOutputStream(audioFileInCache)) {
copyStream(is, fos);
// write text to file for transparency too
Expand All @@ -89,7 +89,7 @@ public File getTextToSpeechAsFile(String apiKey, String text, String locale, Str
*
* Sample: "en-US_00a2653ac5f77063bc4ea2fee87318d3"
*/
private String getUniqueFilenameForText(String text, String locale) {
private String getUniqueFilenameForText(String text, String locale, String voice) {
try {
byte[] bytesOfMessage = text.getBytes(StandardCharsets.UTF_8);
MessageDigest md = MessageDigest.getInstance("MD5");
Expand All @@ -101,7 +101,12 @@ private String getUniqueFilenameForText(String text, String locale) {
while (hashtext.length() < 32) {
hashtext = "0" + hashtext;
}
return locale + "_" + hashtext;
String filename = locale + "_";
if (!DEFAULT_VOICE.equals(voice)) {
filename += voice + "_";
}
filename += hashtext;
return filename;
} catch (NoSuchAlgorithmException ex) {
// should not happen
logger.error("Could not create MD5 hash for '{}'", text, ex);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,15 @@ public interface VoiceRSSCloudAPI {
* the text to translate into speech
* @param locale
* the locale to use
* @param voice
* the voice to use, "default" for the default voice
* @param audioFormat
* the audio format to use
* @return an InputStream to the audio data in specified format
* @throws IOException
* will be raised if the audio data can not be retrieved from
* cloud service
*/
InputStream getTextToSpeech(String apiKey, String text, String locale, String audioFormat) throws IOException;
InputStream getTextToSpeech(String apiKey, String text, String locale, String voice, String audioFormat)
throws IOException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.stream.Stream;
Expand All @@ -34,7 +35,7 @@

/**
* This class implements the Cloud service from VoiceRSS. For more information,
* see API documentation at http://www.voicerss.org/api/documentation.aspx.
* see API documentation at http://www.voicerss.org/api .
*
* Current state of implementation:
* <ul>
Expand All @@ -50,6 +51,8 @@
*/
public class VoiceRSSCloudImpl implements VoiceRSSCloudAPI {

public static final String DEFAULT_VOICE = "default";

private final Logger logger = LoggerFactory.getLogger(VoiceRSSCloudImpl.class);

private static final Set<String> SUPPORTED_AUDIO_FORMATS = Stream.of("MP3", "OGG", "AAC").collect(toSet());
Expand All @@ -63,8 +66,8 @@ public class VoiceRSSCloudImpl implements VoiceRSSCloudAPI {
SUPPORTED_LOCALES.add(Locale.forLanguageTag("cs-cz"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("da-dk"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("de-at"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("de-ch"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("de-de"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("de-ch"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("el-gr"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("en-au"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("en-ca"));
Expand All @@ -76,8 +79,8 @@ public class VoiceRSSCloudImpl implements VoiceRSSCloudAPI {
SUPPORTED_LOCALES.add(Locale.forLanguageTag("es-mx"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("fi-fi"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("fr-ca"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("fr-ch"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("fr-fr"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("fr-ch"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("he-il"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("hi-in"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("hr-hr"));
Expand Down Expand Up @@ -107,7 +110,58 @@ public class VoiceRSSCloudImpl implements VoiceRSSCloudAPI {
SUPPORTED_LOCALES.add(Locale.forLanguageTag("zh-tw"));
}

private static final Set<String> SUPPORTED_VOICES = Collections.singleton("VoiceRSS");
private static final Map<String, Set<String>> SUPPORTED_VOICES = new HashMap<>();
static {
SUPPORTED_VOICES.put("ar-eg", Set.of("Oda"));
SUPPORTED_VOICES.put("ar-sa", Set.of("Salim"));
SUPPORTED_VOICES.put("bg-bg", Set.of("Dimo"));
SUPPORTED_VOICES.put("ca-es", Set.of("Rut"));
SUPPORTED_VOICES.put("cs-cz", Set.of("Josef"));
SUPPORTED_VOICES.put("da-dk", Set.of("Freja"));
SUPPORTED_VOICES.put("de-at", Set.of("Lukas"));
SUPPORTED_VOICES.put("de-de", Set.of("Hanna", "Lina", "Jonas"));
SUPPORTED_VOICES.put("de-ch", Set.of("Tim"));
SUPPORTED_VOICES.put("el-gr", Set.of("Neo"));
SUPPORTED_VOICES.put("en-au", Set.of("Zoe", "Isla", "Evie", "Jack"));
SUPPORTED_VOICES.put("en-ca", Set.of("Rose", "Clara", "Emma", "Mason"));
SUPPORTED_VOICES.put("en-gb", Set.of("Alice", "Nancy", "Lily", "Harry"));
SUPPORTED_VOICES.put("en-ie", Set.of("Oran"));
SUPPORTED_VOICES.put("en-in", Set.of("Eka", "Jai", "Ajit"));
SUPPORTED_VOICES.put("en-us", Set.of("Linda", "Amy", "Mary", "John", "Mike"));
SUPPORTED_VOICES.put("es-es", Set.of("Camila", "Sofia", "Luna", "Diego"));
SUPPORTED_VOICES.put("es-mx", Set.of("Juana", "Silvia", "Teresa", "Jose"));
SUPPORTED_VOICES.put("fi-fi", Set.of("Aada"));
SUPPORTED_VOICES.put("fr-ca", Set.of("Emile", "Olivia", "Logan", "Felix"));
SUPPORTED_VOICES.put("fr-fr", Set.of("Bette", "Iva", "Zola", "Axel"));
SUPPORTED_VOICES.put("fr-ch", Set.of("Theo"));
SUPPORTED_VOICES.put("he-il", Set.of("Rami"));
SUPPORTED_VOICES.put("hi-in", Set.of("Puja", "Kabir"));
SUPPORTED_VOICES.put("hr-hr", Set.of("Nikola"));
SUPPORTED_VOICES.put("hu-hu", Set.of("Mate"));
SUPPORTED_VOICES.put("id-id", Set.of("Intan"));
SUPPORTED_VOICES.put("it-it", Set.of("Bria", "Mia", "Pietro"));
SUPPORTED_VOICES.put("ja-jp", Set.of("Hina", "Airi", "Fumi", "Akira"));
SUPPORTED_VOICES.put("ko-kr", Set.of("Nari"));
SUPPORTED_VOICES.put("ms-my", Set.of("Aqil"));
SUPPORTED_VOICES.put("nb-no", Set.of("Marte", "Erik"));
SUPPORTED_VOICES.put("nl-be", Set.of("Daan"));
SUPPORTED_VOICES.put("nl-nl", Set.of("Lotte", "Bram"));
SUPPORTED_VOICES.put("pl-pl", Set.of("Julia", "Jan"));
SUPPORTED_VOICES.put("pt-br", Set.of("Marcia", "Ligia", "Yara", "Dinis"));
SUPPORTED_VOICES.put("pt-pt", Set.of("Leonor"));
SUPPORTED_VOICES.put("ro-ro", Set.of("Doru"));
SUPPORTED_VOICES.put("ru-ru", Set.of("Olga", "Marina", "Peter"));
SUPPORTED_VOICES.put("sk-sk", Set.of("Beda"));
SUPPORTED_VOICES.put("sl-si", Set.of("Vid"));
SUPPORTED_VOICES.put("sv-se", Set.of("Molly", "Hugo"));
SUPPORTED_VOICES.put("ta-in", Set.of("Sai"));
SUPPORTED_VOICES.put("th-th", Set.of("Ukrit"));
SUPPORTED_VOICES.put("tr-tr", Set.of("Omer"));
SUPPORTED_VOICES.put("vi-vn", Set.of("Chi"));
SUPPORTED_VOICES.put("zh-cn", Set.of("Luli", "Shu", "Chow", "Wang"));
SUPPORTED_VOICES.put("zh-hk", Set.of("Jia", "Xia", "Chen"));
SUPPORTED_VOICES.put("zh-tw", Set.of("Akemi", "Lin", "Lee"));
}

@Override
public Set<String> getAvailableAudioFormats() {
Expand All @@ -121,17 +175,29 @@ public Set<Locale> getAvailableLocales() {

@Override
public Set<String> getAvailableVoices() {
return SUPPORTED_VOICES;
// different locales support different voices, so let's list all here in one big set when no locale is provided
Set<String> allvoxes = new HashSet<>();
allvoxes.add(DEFAULT_VOICE);
for (Set<String> langvoxes : SUPPORTED_VOICES.values()) {
for (String langvox : langvoxes) {
allvoxes.add(langvox);
}
}
return allvoxes;
}

@Override
public Set<String> getAvailableVoices(Locale locale) {
for (Locale voiceLocale : SUPPORTED_LOCALES) {
if (voiceLocale.toLanguageTag().equalsIgnoreCase(locale.toLanguageTag())) {
return SUPPORTED_VOICES;
Set<String> allvoxes = new HashSet<>();
allvoxes.add(DEFAULT_VOICE);
// all maps must be defined with key in lowercase
String langtag = locale.toLanguageTag().toLowerCase();
if (SUPPORTED_VOICES.containsKey(langtag)) {
for (String langvox : SUPPORTED_VOICES.get(langtag)) {
allvoxes.add(langvox);
}
}
return new HashSet<>();
return allvoxes;
}

/**
Expand All @@ -142,9 +208,9 @@ public Set<String> getAvailableVoices(Locale locale) {
* dependencies.
*/
@Override
public InputStream getTextToSpeech(String apiKey, String text, String locale, String audioFormat)
public InputStream getTextToSpeech(String apiKey, String text, String locale, String voice, String audioFormat)
throws IOException {
String url = createURL(apiKey, text, locale, audioFormat);
String url = createURL(apiKey, text, locale, voice, audioFormat);
logger.debug("Call {}", url);
URLConnection connection = new URL(url).openConnection();

Expand Down Expand Up @@ -188,7 +254,7 @@ public InputStream getTextToSpeech(String apiKey, String text, String locale, St
*
* It is in package scope to be accessed by tests.
*/
private String createURL(String apiKey, String text, String locale, String audioFormat) {
private String createURL(String apiKey, String text, String locale, String voice, String audioFormat) {
String encodedMsg;
try {
encodedMsg = URLEncoder.encode(text, "UTF-8");
Expand All @@ -197,7 +263,11 @@ private String createURL(String apiKey, String text, String locale, String audio
// fall through and use msg un-encoded
encodedMsg = text;
}
return "http://api.voicerss.org/?key=" + apiKey + "&hl=" + locale + "&c=" + audioFormat
+ "&f=44khz_16bit_mono&src=" + encodedMsg;
String url = "http://api.voicerss.org/?key=" + apiKey + "&hl=" + locale + "&c=" + audioFormat;
if (!DEFAULT_VOICE.equals(voice)) {
url += "&v=" + voice;
}
url += "&f=44khz_16bit_mono&src=" + encodedMsg;
return url;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -49,18 +49,19 @@ public int doMain(String[] args) throws IOException {
String apiKey = args[1];
String cacheDir = args[2];
String locale = args[3];
if (args[4].startsWith("@")) {
String inputFileName = args[4].substring(1);
String voice = args[4];
if (args[5].startsWith("@")) {
String inputFileName = args[5].substring(1);
File inputFile = new File(inputFileName);
if (!inputFile.exists()) {
usage();
System.err.println("File " + inputFileName + " not found");
return RC_INPUT_FILE_NOT_FOUND;
}
generateCacheForFile(apiKey, cacheDir, locale, inputFileName);
generateCacheForFile(apiKey, cacheDir, locale, voice, inputFileName);
} else {
String text = args[4];
generateCacheForMessage(apiKey, cacheDir, locale, text);
String text = args[5];
generateCacheForMessage(apiKey, cacheDir, locale, voice, text);
}
return RC_OK;
}
Expand All @@ -71,6 +72,7 @@ private void usage() {
System.out.println(" key the VoiceRSS API Key, e.g. \"123456789\"");
System.out.println(" cache-dir is directory where the files will be stored, e.g. \"voicerss-cache\"");
System.out.println(" locale the language locale, has to be valid, e.g. \"en-us\", \"de-de\"");
System.out.println(" voice the voice, \"default\" for the default voice");
System.out.println(" text the text to create audio file for, e.g. \"Hello World\"");
System.out.println(
" inputfile a name of a file, where all lines will be translatet to text, e.g. \"@message.txt\"");
Expand All @@ -80,19 +82,20 @@ private void usage() {
System.out.println();
}

private void generateCacheForFile(String apiKey, String cacheDir, String locale, String inputFileName)
private void generateCacheForFile(String apiKey, String cacheDir, String locale, String voice, String inputFileName)
throws IOException {
File inputFile = new File(inputFileName);
try (BufferedReader br = new BufferedReader(new FileReader(inputFile))) {
String line;
while ((line = br.readLine()) != null) {
// process the line.
generateCacheForMessage(apiKey, cacheDir, locale, line);
generateCacheForMessage(apiKey, cacheDir, locale, voice, line);
}
}
}

private void generateCacheForMessage(String apiKey, String cacheDir, String locale, String msg) throws IOException {
private void generateCacheForMessage(String apiKey, String cacheDir, String locale, String voice, String msg)
throws IOException {
if (msg == null) {
System.err.println("Ignore msg=null");
return;
Expand All @@ -103,7 +106,7 @@ private void generateCacheForMessage(String apiKey, String cacheDir, String loca
return;
}
CachedVoiceRSSCloudImpl impl = new CachedVoiceRSSCloudImpl(cacheDir);
File cachedFile = impl.getTextToSpeechAsFile(apiKey, trimmedMsg, locale, "MP3");
File cachedFile = impl.getTextToSpeechAsFile(apiKey, trimmedMsg, locale, voice, "MP3");
System.out.println(
"Created cached audio for locale='" + locale + "', msg='" + trimmedMsg + "' to file=" + cachedFile);
}
Expand Down

0 comments on commit 17f7041

Please sign in to comment.