Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[voicerss] Add support for voices #10184

Merged
merged 2 commits into from
Feb 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ public AudioStream synthesize(String text, Voice voice, AudioFormat requestedFor
// only a default voice
try {
File cacheAudioFile = voiceRssImpl.getTextToSpeechAsFile(apiKey, trimmedText,
voice.getLocale().toLanguageTag(), getApiAudioFormat(requestedFormat));
voice.getLocale().toLanguageTag(), voice.getLabel(), getApiAudioFormat(requestedFormat));
if (cacheAudioFile == null) {
throw new TTSException("Could not read from VoiceRSS service");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import java.util.Locale;

import org.openhab.core.voice.Voice;
import org.openhab.voice.voicerss.internal.cloudapi.VoiceRSSCloudImpl;

/**
* Implementation of the Voice interface for VoiceRSS. Label is only "default"
Expand Down Expand Up @@ -54,7 +55,11 @@ public VoiceRSSVoice(Locale locale, String label) {
*/
@Override
public String getUID() {
return "voicerss:" + locale.toLanguageTag().replaceAll("[^a-zA-Z0-9_]", "");
String uid = "voicerss:" + locale.toLanguageTag().replaceAll("[^a-zA-Z0-9_]", "");
if (!label.equals(VoiceRSSCloudImpl.DEFAULT_VOICE)) {
uid += "_" + label.replaceAll("[^a-zA-Z0-9_]", "");
}
return uid;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,17 +55,17 @@ public CachedVoiceRSSCloudImpl(String cacheFolderName) {
}
}

public File getTextToSpeechAsFile(String apiKey, String text, String locale, String audioFormat)
public File getTextToSpeechAsFile(String apiKey, String text, String locale, String voice, String audioFormat)
throws IOException {
String fileNameInCache = getUniqueFilenameForText(text, locale);
String fileNameInCache = getUniqueFilenameForText(text, locale, voice);
// check if in cache
File audioFileInCache = new File(cacheFolder, fileNameInCache + "." + audioFormat.toLowerCase());
if (audioFileInCache.exists()) {
return audioFileInCache;
}

// if not in cache, get audio data and put to cache
try (InputStream is = super.getTextToSpeech(apiKey, text, locale, audioFormat);
try (InputStream is = super.getTextToSpeech(apiKey, text, locale, voice, audioFormat);
FileOutputStream fos = new FileOutputStream(audioFileInCache)) {
copyStream(is, fos);
// write text to file for transparency too
Expand All @@ -89,7 +89,7 @@ public File getTextToSpeechAsFile(String apiKey, String text, String locale, Str
*
* Sample: "en-US_00a2653ac5f77063bc4ea2fee87318d3"
*/
private String getUniqueFilenameForText(String text, String locale) {
private String getUniqueFilenameForText(String text, String locale, String voice) {
try {
byte[] bytesOfMessage = text.getBytes(StandardCharsets.UTF_8);
MessageDigest md = MessageDigest.getInstance("MD5");
Expand All @@ -101,7 +101,12 @@ private String getUniqueFilenameForText(String text, String locale) {
while (hashtext.length() < 32) {
hashtext = "0" + hashtext;
}
return locale + "_" + hashtext;
String filename = locale + "_";
if (!DEFAULT_VOICE.equals(voice)) {
filename += voice + "_";
}
filename += hashtext;
return filename;
} catch (NoSuchAlgorithmException ex) {
// should not happen
logger.error("Could not create MD5 hash for '{}'", text, ex);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,15 @@ public interface VoiceRSSCloudAPI {
* the text to translate into speech
* @param locale
* the locale to use
* @param voice
* the voice to use, "default" for the default voice
* @param audioFormat
* the audio format to use
* @return an InputStream to the audio data in specified format
* @throws IOException
* will be raised if the audio data can not be retrieved from
* cloud service
*/
InputStream getTextToSpeech(String apiKey, String text, String locale, String audioFormat) throws IOException;
InputStream getTextToSpeech(String apiKey, String text, String locale, String voice, String audioFormat)
throws IOException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.stream.Stream;
Expand All @@ -34,7 +35,7 @@

/**
* This class implements the Cloud service from VoiceRSS. For more information,
* see API documentation at http://www.voicerss.org/api/documentation.aspx.
* see API documentation at http://www.voicerss.org/api .
*
* Current state of implementation:
* <ul>
Expand All @@ -50,6 +51,8 @@
*/
public class VoiceRSSCloudImpl implements VoiceRSSCloudAPI {

public static final String DEFAULT_VOICE = "default";

private final Logger logger = LoggerFactory.getLogger(VoiceRSSCloudImpl.class);

private static final Set<String> SUPPORTED_AUDIO_FORMATS = Stream.of("MP3", "OGG", "AAC").collect(toSet());
Expand All @@ -63,8 +66,8 @@ public class VoiceRSSCloudImpl implements VoiceRSSCloudAPI {
SUPPORTED_LOCALES.add(Locale.forLanguageTag("cs-cz"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("da-dk"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("de-at"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("de-ch"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("de-de"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("de-ch"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("el-gr"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("en-au"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("en-ca"));
Expand All @@ -76,8 +79,8 @@ public class VoiceRSSCloudImpl implements VoiceRSSCloudAPI {
SUPPORTED_LOCALES.add(Locale.forLanguageTag("es-mx"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("fi-fi"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("fr-ca"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("fr-ch"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("fr-fr"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("fr-ch"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("he-il"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("hi-in"));
SUPPORTED_LOCALES.add(Locale.forLanguageTag("hr-hr"));
Expand Down Expand Up @@ -107,7 +110,58 @@ public class VoiceRSSCloudImpl implements VoiceRSSCloudAPI {
SUPPORTED_LOCALES.add(Locale.forLanguageTag("zh-tw"));
}

private static final Set<String> SUPPORTED_VOICES = Collections.singleton("VoiceRSS");
private static final Map<String, Set<String>> SUPPORTED_VOICES = new HashMap<>();
static {
SUPPORTED_VOICES.put("ar-eg", Set.of("Oda"));
SUPPORTED_VOICES.put("ar-sa", Set.of("Salim"));
SUPPORTED_VOICES.put("bg-bg", Set.of("Dimo"));
SUPPORTED_VOICES.put("ca-es", Set.of("Rut"));
SUPPORTED_VOICES.put("cs-cz", Set.of("Josef"));
SUPPORTED_VOICES.put("da-dk", Set.of("Freja"));
SUPPORTED_VOICES.put("de-at", Set.of("Lukas"));
SUPPORTED_VOICES.put("de-de", Set.of("Hanna", "Lina", "Jonas"));
SUPPORTED_VOICES.put("de-ch", Set.of("Tim"));
SUPPORTED_VOICES.put("el-gr", Set.of("Neo"));
SUPPORTED_VOICES.put("en-au", Set.of("Zoe", "Isla", "Evie", "Jack"));
SUPPORTED_VOICES.put("en-ca", Set.of("Rose", "Clara", "Emma", "Mason"));
SUPPORTED_VOICES.put("en-gb", Set.of("Alice", "Nancy", "Lily", "Harry"));
SUPPORTED_VOICES.put("en-ie", Set.of("Oran"));
SUPPORTED_VOICES.put("en-in", Set.of("Eka", "Jai", "Ajit"));
SUPPORTED_VOICES.put("en-us", Set.of("Linda", "Amy", "Mary", "John", "Mike"));
SUPPORTED_VOICES.put("es-es", Set.of("Camila", "Sofia", "Luna", "Diego"));
SUPPORTED_VOICES.put("es-mx", Set.of("Juana", "Silvia", "Teresa", "Jose"));
SUPPORTED_VOICES.put("fi-fi", Set.of("Aada"));
SUPPORTED_VOICES.put("fr-ca", Set.of("Emile", "Olivia", "Logan", "Felix"));
SUPPORTED_VOICES.put("fr-fr", Set.of("Bette", "Iva", "Zola", "Axel"));
SUPPORTED_VOICES.put("fr-ch", Set.of("Theo"));
SUPPORTED_VOICES.put("he-il", Set.of("Rami"));
SUPPORTED_VOICES.put("hi-in", Set.of("Puja", "Kabir"));
SUPPORTED_VOICES.put("hr-hr", Set.of("Nikola"));
SUPPORTED_VOICES.put("hu-hu", Set.of("Mate"));
SUPPORTED_VOICES.put("id-id", Set.of("Intan"));
SUPPORTED_VOICES.put("it-it", Set.of("Bria", "Mia", "Pietro"));
SUPPORTED_VOICES.put("ja-jp", Set.of("Hina", "Airi", "Fumi", "Akira"));
SUPPORTED_VOICES.put("ko-kr", Set.of("Nari"));
SUPPORTED_VOICES.put("ms-my", Set.of("Aqil"));
SUPPORTED_VOICES.put("nb-no", Set.of("Marte", "Erik"));
SUPPORTED_VOICES.put("nl-be", Set.of("Daan"));
SUPPORTED_VOICES.put("nl-nl", Set.of("Lotte", "Bram"));
SUPPORTED_VOICES.put("pl-pl", Set.of("Julia", "Jan"));
SUPPORTED_VOICES.put("pt-br", Set.of("Marcia", "Ligia", "Yara", "Dinis"));
SUPPORTED_VOICES.put("pt-pt", Set.of("Leonor"));
SUPPORTED_VOICES.put("ro-ro", Set.of("Doru"));
SUPPORTED_VOICES.put("ru-ru", Set.of("Olga", "Marina", "Peter"));
SUPPORTED_VOICES.put("sk-sk", Set.of("Beda"));
SUPPORTED_VOICES.put("sl-si", Set.of("Vid"));
SUPPORTED_VOICES.put("sv-se", Set.of("Molly", "Hugo"));
SUPPORTED_VOICES.put("ta-in", Set.of("Sai"));
SUPPORTED_VOICES.put("th-th", Set.of("Ukrit"));
SUPPORTED_VOICES.put("tr-tr", Set.of("Omer"));
SUPPORTED_VOICES.put("vi-vn", Set.of("Chi"));
SUPPORTED_VOICES.put("zh-cn", Set.of("Luli", "Shu", "Chow", "Wang"));
SUPPORTED_VOICES.put("zh-hk", Set.of("Jia", "Xia", "Chen"));
SUPPORTED_VOICES.put("zh-tw", Set.of("Akemi", "Lin", "Lee"));
}

@Override
public Set<String> getAvailableAudioFormats() {
Expand All @@ -121,17 +175,29 @@ public Set<Locale> getAvailableLocales() {

@Override
public Set<String> getAvailableVoices() {
return SUPPORTED_VOICES;
// different locales support different voices, so let's list all here in one big set when no locale is provided
Set<String> allvoxes = new HashSet<>();
allvoxes.add(DEFAULT_VOICE);
for (Set<String> langvoxes : SUPPORTED_VOICES.values()) {
for (String langvox : langvoxes) {
allvoxes.add(langvox);
}
}
return allvoxes;
}

@Override
public Set<String> getAvailableVoices(Locale locale) {
for (Locale voiceLocale : SUPPORTED_LOCALES) {
if (voiceLocale.toLanguageTag().equalsIgnoreCase(locale.toLanguageTag())) {
return SUPPORTED_VOICES;
Set<String> allvoxes = new HashSet<>();
allvoxes.add(DEFAULT_VOICE);
// all maps must be defined with key in lowercase
String langtag = locale.toLanguageTag().toLowerCase();
if (SUPPORTED_VOICES.containsKey(langtag)) {
for (String langvox : SUPPORTED_VOICES.get(langtag)) {
allvoxes.add(langvox);
}
}
return new HashSet<>();
return allvoxes;
}

/**
Expand All @@ -142,9 +208,9 @@ public Set<String> getAvailableVoices(Locale locale) {
* dependencies.
*/
@Override
public InputStream getTextToSpeech(String apiKey, String text, String locale, String audioFormat)
public InputStream getTextToSpeech(String apiKey, String text, String locale, String voice, String audioFormat)
throws IOException {
String url = createURL(apiKey, text, locale, audioFormat);
String url = createURL(apiKey, text, locale, voice, audioFormat);
logger.debug("Call {}", url);
URLConnection connection = new URL(url).openConnection();

Expand Down Expand Up @@ -188,7 +254,7 @@ public InputStream getTextToSpeech(String apiKey, String text, String locale, St
*
* It is in package scope to be accessed by tests.
*/
private String createURL(String apiKey, String text, String locale, String audioFormat) {
private String createURL(String apiKey, String text, String locale, String voice, String audioFormat) {
String encodedMsg;
try {
encodedMsg = URLEncoder.encode(text, "UTF-8");
Expand All @@ -197,7 +263,11 @@ private String createURL(String apiKey, String text, String locale, String audio
// fall through and use msg un-encoded
encodedMsg = text;
}
return "http://api.voicerss.org/?key=" + apiKey + "&hl=" + locale + "&c=" + audioFormat
+ "&f=44khz_16bit_mono&src=" + encodedMsg;
String url = "http://api.voicerss.org/?key=" + apiKey + "&hl=" + locale + "&c=" + audioFormat;
if (!DEFAULT_VOICE.equals(voice)) {
url += "&v=" + voice;
}
url += "&f=44khz_16bit_mono&src=" + encodedMsg;
return url;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -49,18 +49,19 @@ public int doMain(String[] args) throws IOException {
String apiKey = args[1];
String cacheDir = args[2];
String locale = args[3];
if (args[4].startsWith("@")) {
String inputFileName = args[4].substring(1);
String voice = args[4];
if (args[5].startsWith("@")) {
String inputFileName = args[5].substring(1);
File inputFile = new File(inputFileName);
if (!inputFile.exists()) {
usage();
System.err.println("File " + inputFileName + " not found");
return RC_INPUT_FILE_NOT_FOUND;
}
generateCacheForFile(apiKey, cacheDir, locale, inputFileName);
generateCacheForFile(apiKey, cacheDir, locale, voice, inputFileName);
} else {
String text = args[4];
generateCacheForMessage(apiKey, cacheDir, locale, text);
String text = args[5];
generateCacheForMessage(apiKey, cacheDir, locale, voice, text);
}
return RC_OK;
}
Expand All @@ -71,6 +72,7 @@ private void usage() {
System.out.println(" key the VoiceRSS API Key, e.g. \"123456789\"");
System.out.println(" cache-dir is directory where the files will be stored, e.g. \"voicerss-cache\"");
System.out.println(" locale the language locale, has to be valid, e.g. \"en-us\", \"de-de\"");
System.out.println(" voice the voice, \"default\" for the default voice");
System.out.println(" text the text to create audio file for, e.g. \"Hello World\"");
System.out.println(
" inputfile a name of a file, where all lines will be translatet to text, e.g. \"@message.txt\"");
Expand All @@ -80,19 +82,20 @@ private void usage() {
System.out.println();
}

private void generateCacheForFile(String apiKey, String cacheDir, String locale, String inputFileName)
private void generateCacheForFile(String apiKey, String cacheDir, String locale, String voice, String inputFileName)
throws IOException {
File inputFile = new File(inputFileName);
try (BufferedReader br = new BufferedReader(new FileReader(inputFile))) {
String line;
while ((line = br.readLine()) != null) {
// process the line.
generateCacheForMessage(apiKey, cacheDir, locale, line);
generateCacheForMessage(apiKey, cacheDir, locale, voice, line);
}
}
}

private void generateCacheForMessage(String apiKey, String cacheDir, String locale, String msg) throws IOException {
private void generateCacheForMessage(String apiKey, String cacheDir, String locale, String voice, String msg)
throws IOException {
if (msg == null) {
System.err.println("Ignore msg=null");
return;
Expand All @@ -103,7 +106,7 @@ private void generateCacheForMessage(String apiKey, String cacheDir, String loca
return;
}
CachedVoiceRSSCloudImpl impl = new CachedVoiceRSSCloudImpl(cacheDir);
File cachedFile = impl.getTextToSpeechAsFile(apiKey, trimmedMsg, locale, "MP3");
File cachedFile = impl.getTextToSpeechAsFile(apiKey, trimmedMsg, locale, voice, "MP3");
System.out.println(
"Created cached audio for locale='" + locale + "', msg='" + trimmedMsg + "' to file=" + cachedFile);
}
Expand Down