Skip to content

Commit

Permalink
[voice] Add dialog group and location (#3798)
Browse files Browse the repository at this point in the history
Signed-off-by: Miguel Álvarez <[email protected]>
  • Loading branch information
GiviMAD authored Sep 13, 2023
1 parent f6435ec commit d87ef1f
Show file tree
Hide file tree
Showing 9 changed files with 231 additions and 145 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,78 +29,10 @@
* @author Miguel Álvarez - Initial contribution
*/
@NonNullByDefault
public class DialogContext {
private final @Nullable KSService ks;
private final @Nullable String keyword;
private final STTService stt;
private final TTSService tts;
private final @Nullable Voice voice;
private final List<HumanLanguageInterpreter> hlis;
private final AudioSource source;
private final AudioSink sink;
private final Locale locale;
private final @Nullable String listeningItem;
private final @Nullable String listeningMelody;

public DialogContext(@Nullable KSService ks, @Nullable String keyword, STTService stt, TTSService tts,
@Nullable Voice voice, List<HumanLanguageInterpreter> hlis, AudioSource source, AudioSink sink,
Locale locale, @Nullable String listeningItem, @Nullable String listeningMelody) {
this.ks = ks;
this.keyword = keyword;
this.stt = stt;
this.tts = tts;
this.voice = voice;
this.hlis = hlis;
this.source = source;
this.sink = sink;
this.locale = locale;
this.listeningItem = listeningItem;
this.listeningMelody = listeningMelody;
}

public @Nullable KSService ks() {
return ks;
}

public @Nullable String keyword() {
return keyword;
}

public STTService stt() {
return stt;
}

public TTSService tts() {
return tts;
}

public @Nullable Voice voice() {
return voice;
}

public List<HumanLanguageInterpreter> hlis() {
return hlis;
}

public AudioSource source() {
return source;
}

public AudioSink sink() {
return sink;
}

public Locale locale() {
return locale;
}

public @Nullable String listeningItem() {
return listeningItem;
}

public @Nullable String listeningMelody() {
return listeningMelody;
}
public record DialogContext(@Nullable KSService ks, @Nullable String keyword, STTService stt, TTSService tts,
@Nullable Voice voice, List<HumanLanguageInterpreter> hlis, AudioSource source, AudioSink sink, Locale locale,
String dialogGroup, @Nullable String locationItem, @Nullable String listeningItem,
@Nullable String listeningMelody) {

/**
* Builder for {@link DialogContext}
Expand All @@ -116,6 +48,8 @@ public static class Builder {
private @Nullable Voice voice;
private List<HumanLanguageInterpreter> hlis = List.of();
// options
private String dialogGroup = "default";
private @Nullable String locationItem;
private @Nullable String listeningItem;
private @Nullable String listeningMelody;
private String keyword;
Expand Down Expand Up @@ -189,6 +123,20 @@ public Builder withVoice(@Nullable Voice voice) {
return this;
}

public Builder withDialogGroup(@Nullable String dialogGroup) {
if (dialogGroup != null) {
this.dialogGroup = dialogGroup;
}
return this;
}

public Builder withLocationItem(@Nullable String locationItem) {
if (locationItem != null) {
this.locationItem = locationItem;
}
return this;
}

public Builder withListeningItem(@Nullable String listeningItem) {
if (listeningItem != null) {
this.listeningItem = listeningItem;
Expand Down Expand Up @@ -244,7 +192,7 @@ public DialogContext build() throws IllegalStateException {
throw new IllegalStateException("Cannot build dialog context: " + String.join(", ", errors) + ".");
} else {
return new DialogContext(ksService, keyword, sttService, ttsService, voice, hliServices, audioSource,
audioSink, locale, listeningItem, listeningMelody);
audioSink, locale, dialogGroup, locationItem, listeningItem, listeningMelody);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,14 @@ public class DialogRegistration {
* Linked listening item
*/
public @Nullable String listeningItem;
/**
* Linked location item
*/
public @Nullable String locationItem;
/**
* Dialog group name
*/
public @Nullable String dialogGroup;
/**
* Custom listening melody
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.WeakHashMap;
import java.util.stream.Collectors;
import java.util.stream.Stream;

Expand Down Expand Up @@ -69,13 +70,13 @@
* @author Miguel Álvarez - Close audio streams + use RecognitionStartEvent
* @author Miguel Álvarez - Use dialog context
* @author Miguel Álvarez - Add sounds
* @author Miguel Álvarez - Add dialog groups
*
*/
@NonNullByDefault
public class DialogProcessor implements KSListener, STTListener {

private final Logger logger = LoggerFactory.getLogger(DialogProcessor.class);

private final WeakHashMap<String, DialogContext> activeDialogGroups;
public final DialogContext dialogContext;
private @Nullable List<ToneSynthesizer.Tone> listeningMelody;
private final EventPublisher eventPublisher;
Expand Down Expand Up @@ -105,11 +106,12 @@ public class DialogProcessor implements KSListener, STTListener {
private @Nullable ToneSynthesizer toneSynthesizer;

public DialogProcessor(DialogContext context, DialogEventListener eventListener, EventPublisher eventPublisher,
TranslationProvider i18nProvider, Bundle bundle) {
WeakHashMap<String, DialogContext> activeDialogGroups, TranslationProvider i18nProvider, Bundle bundle) {
this.dialogContext = context;
this.eventListener = eventListener;
this.eventPublisher = eventPublisher;
this.i18nProvider = i18nProvider;
this.activeDialogGroups = activeDialogGroups;
this.bundle = bundle;
var ks = context.ks();
this.ksFormat = ks != null
Expand Down Expand Up @@ -182,7 +184,15 @@ public void start() throws IllegalStateException {
* Starts a single dialog
*/
public void startSimpleDialog() {
abortSTT();
synchronized (activeDialogGroups) {
if (!activeDialogGroups.containsKey(dialogContext.dialogGroup())) {
logger.debug("Acquiring dialog group '{}'", dialogContext.dialogGroup());
activeDialogGroups.put(dialogContext.dialogGroup(), dialogContext);
} else {
logger.warn("Ignoring keyword spotting event, dialog group '{}' running", dialogContext.dialogGroup());
return;
}
}
closeStreamSTT();
isSTTServerAborting = false;
AudioFormat fmt = sttFormat;
Expand All @@ -196,6 +206,7 @@ public void startSimpleDialog() {
AudioStream stream = dialogContext.source().getInputStream(fmt);
streamSTT = stream;
sttServiceHandle = dialogContext.stt().recognize(this, stream, dialogContext.locale(), new HashSet<>());
return;
} catch (AudioException e) {
logger.warn("Error creating the audio stream: {}", e.getMessage());
} catch (STTException e) {
Expand All @@ -208,6 +219,11 @@ public void startSimpleDialog() {
say(text.replace("{0}", ""));
}
}
// In case of error release dialog group
synchronized (activeDialogGroups) {
logger.debug("Releasing dialog group '{}' due to errors", dialogContext.dialogGroup());
activeDialogGroups.remove(dialogContext.dialogGroup());
}
}

/**
Expand Down Expand Up @@ -264,6 +280,10 @@ private void abortSTT() {
sttServiceHandle = null;
}
isSTTServerAborting = true;
synchronized (activeDialogGroups) {
logger.debug("Releasing dialog group '{}'", dialogContext.dialogGroup());
activeDialogGroups.remove(dialogContext.dialogGroup());
}
}

private void closeStreamSTT() {
Expand Down Expand Up @@ -292,20 +312,18 @@ private void toggleProcessing(boolean value) {

@Override
public void ksEventReceived(KSEvent ksEvent) {
if (!processing) {
isSTTServerAborting = false;
if (ksEvent instanceof KSpottedEvent) {
logger.debug("KSpottedEvent event received");
try {
startSimpleDialog();
} catch (IllegalStateException e) {
logger.warn("{}", e.getMessage());
}
} else if (ksEvent instanceof KSErrorEvent kse) {
logger.debug("KSErrorEvent event received");
String text = i18nProvider.getText(bundle, "error.ks-error", null, dialogContext.locale());
say(text == null ? kse.getMessage() : text.replace("{0}", kse.getMessage()));
isSTTServerAborting = false;
if (ksEvent instanceof KSpottedEvent) {
logger.debug("KSpottedEvent event received");
try {
startSimpleDialog();
} catch (IllegalStateException e) {
logger.warn("{}", e.getMessage());
}
} else if (ksEvent instanceof KSErrorEvent kse) {
logger.debug("KSErrorEvent event received");
String text = i18nProvider.getText(bundle, "error.ks-error", null, dialogContext.locale());
say(text == null ? kse.getMessage() : text.replace("{0}", kse.getMessage()));
}
}

Expand All @@ -322,7 +340,7 @@ public synchronized void sttEventReceived(STTEvent sttEvent) {
String error = null;
for (HumanLanguageInterpreter interpreter : dialogContext.hlis()) {
try {
answer = interpreter.interpret(dialogContext.locale(), question);
answer = interpreter.interpret(dialogContext.locale(), question, dialogContext);
logger.debug("Interpretation result: {}", answer);
error = null;
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,17 +97,17 @@ public List<String> getUsages() {
buildCommandUsage(SUBCMD_DIALOG_REGS,
"lists the existing dialog registrations and their selected audio/voice services"),
buildCommandUsage(SUBCMD_REGISTER_DIALOG
+ " [--source <source>] [--sink <sink>] [--hlis <comma,separated,interpreters>] [--tts <tts> [--voice <voice>]] [--stt <stt>] [--ks ks [--keyword <ks>]] [--listening-item <listeningItem>]",
+ " [--source <source>] [--sink <sink>] [--hlis <comma,separated,interpreters>] [--tts <tts> [--voice <voice>]] [--stt <stt>] [--ks ks [--keyword <ks>]] [--listening-item <listeningItem>] [--location-item <locationItem>] [--dialog-group <dialogGroup>]",
"register a new dialog processing using the default services or the services identified with provided arguments, it will be persisted and keep running whenever is possible."),
buildCommandUsage(SUBCMD_UNREGISTER_DIALOG + " [source]",
"unregister the dialog processing for the default audio source or the audio source identified with provided argument, stopping it if started"),
buildCommandUsage(SUBCMD_START_DIALOG
+ " [--source <source>] [--sink <sink>] [--hlis <comma,separated,interpreters>] [--tts <tts> [--voice <voice>]] [--stt <stt>] [--ks ks [--keyword <ks>]] [--listening-item <listeningItem>]",
+ " [--source <source>] [--sink <sink>] [--hlis <comma,separated,interpreters>] [--tts <tts> [--voice <voice>]] [--stt <stt>] [--ks ks [--keyword <ks>]] [--listening-item <listeningItem>] [--location-item <locationItem>] [--dialog-group <dialogGroup>]",
"start a new dialog processing using the default services or the services identified with provided arguments"),
buildCommandUsage(SUBCMD_STOP_DIALOG + " [<source>]",
"stop the dialog processing for the default audio source or the audio source identified with provided argument"),
buildCommandUsage(SUBCMD_LISTEN_ANSWER
+ " [--source <source>] [--sink <sink>] [--hlis <comma,separated,interpreters>] [--tts <tts> [--voice <voice>]] [--stt <stt>] [--listening-item <listeningItem>]",
+ " [--source <source>] [--sink <sink>] [--hlis <comma,separated,interpreters>] [--tts <tts> [--voice <voice>]] [--stt <stt>] [--listening-item <listeningItem>] [--location-item <locationItem>] [--dialog-group <dialogGroup>]",
"Execute a simple dialog sequence without keyword spotting using the default services or the services identified with provided arguments"),
buildCommandUsage(SUBCMD_INTERPRETERS, "lists the interpreters"),
buildCommandUsage(SUBCMD_KEYWORD_SPOTTERS, "lists the keyword spotters"),
Expand Down Expand Up @@ -309,11 +309,12 @@ private void listDialogRegistrations(Console console) {
Collection<DialogRegistration> registrations = voiceManager.getDialogRegistrations();
if (!registrations.isEmpty()) {
registrations.stream().sorted(comparing(dr -> dr.sourceId)).forEach(dr -> {
console.println(
String.format(" Source: %s - Sink: %s (STT: %s, TTS: %s, HLIs: %s, KS: %s, Keyword: %s)",
dr.sourceId, dr.sinkId, getOrDefault(dr.sttId), getOrDefault(dr.ttsId),
dr.hliIds.isEmpty() ? getOrDefault(null) : String.join("->", dr.hliIds),
getOrDefault(dr.ksId), getOrDefault(dr.keyword)));
String locationText = dr.locationItem != null ? String.format(" Location: %s", dr.locationItem) : "";
console.println(String.format(
" Source: %s - Sink: %s (STT: %s, TTS: %s, HLIs: %s, KS: %s, Keyword: %s, Dialog Group: %s)%s",
dr.sourceId, dr.sinkId, getOrDefault(dr.sttId), getOrDefault(dr.ttsId),
dr.hliIds.isEmpty() ? getOrDefault(null) : String.join("->", dr.hliIds), getOrDefault(dr.ksId),
getOrDefault(dr.keyword), getOrDefault(dr.dialogGroup), locationText));
});
} else {
console.println("No dialog registrations.");
Expand All @@ -330,11 +331,12 @@ private void listDialogs(Console console) {
dialogContexts.stream().sorted(comparing(s -> s.source().getId())).forEach(c -> {
var ks = c.ks();
String ksText = ks != null ? String.format(", KS: %s, Keyword: %s", ks.getId(), c.keyword()) : "";
console.println(
String.format(" Source: %s - Sink: %s (STT: %s, TTS: %s, HLIs: %s%s)", c.source().getId(),
c.sink().getId(), c.stt().getId(), c.tts().getId(), c.hlis().stream()
.map(HumanLanguageInterpreter::getId).collect(Collectors.joining("->")),
ksText));
String locationText = c.locationItem() != null ? String.format(" Location: %s", c.locationItem()) : "";
console.println(String.format(
" Source: %s - Sink: %s (STT: %s, TTS: %s, HLIs: %s%s, Dialog Group: %s)%s", c.source().getId(),
c.sink().getId(), c.stt().getId(), c.tts().getId(),
c.hlis().stream().map(HumanLanguageInterpreter::getId).collect(Collectors.joining("->")),
ksText, c.dialogGroup(), locationText));
});
} else {
console.println("No running dialogs.");
Expand Down Expand Up @@ -450,6 +452,8 @@ private DialogContext.Builder parseDialogContext(String[] args) {
.withHLIs(voiceManager.getHLIsByIds(parameters.remove("hlis"))) //
.withKS(voiceManager.getKS(parameters.remove("ks"))) //
.withListeningItem(parameters.remove("listening-item")) //
.withLocationItem(parameters.remove("location-item")) //
.withDialogGroup(parameters.remove("dialog-group")) //
.withKeyword(parameters.remove("keyword"));
if (!parameters.isEmpty()) {
throw new IllegalStateException(
Expand Down Expand Up @@ -483,6 +487,9 @@ private DialogRegistration parseDialogRegistration(String[] args) {
dr.ttsId = parameters.remove("tts");
dr.voiceId = parameters.remove("voice");
dr.listeningItem = parameters.remove("listening-item");
dr.locationItem = parameters.remove("location-item");
dr.dialogGroup = parameters.remove("dialog-group");

String hliIds = parameters.remove("hlis");
if (hliIds != null) {
dr.hliIds = Arrays.stream(hliIds.split(",")).map(String::trim).collect(Collectors.toList());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.WeakHashMap;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
Expand Down Expand Up @@ -116,6 +117,8 @@ public class VoiceManagerImpl implements VoiceManager, ConfigOptionProvider, Dia
private final Map<String, TTSService> ttsServices = new HashMap<>();
private final Map<String, HumanLanguageInterpreter> humanLanguageInterpreters = new HashMap<>();

private final WeakHashMap<String, DialogContext> activeDialogGroups = new WeakHashMap<>();

private final LocaleProvider localeProvider;
private final AudioManager audioManager;
private final EventPublisher eventPublisher;
Expand Down Expand Up @@ -526,7 +529,8 @@ public void startDialog(DialogContext context) throws IllegalStateException {
if (processor == null) {
logger.debug("Starting a new dialog for source {} ({})", context.source().getLabel(null),
context.source().getId());
processor = new DialogProcessor(context, this, this.eventPublisher, this.i18nProvider, b);
processor = new DialogProcessor(context, this, this.eventPublisher, this.activeDialogGroups,
this.i18nProvider, b);
dialogProcessors.put(context.source().getId(), processor);
processor.start();
} else {
Expand Down Expand Up @@ -582,7 +586,8 @@ public void listenAndAnswer(DialogContext context) throws IllegalStateException
isSingleDialog = true;
activeProcessor = singleDialogProcessors.get(audioSource.getId());
}
var processor = new DialogProcessor(context, this, this.eventPublisher, this.i18nProvider, b);
var processor = new DialogProcessor(context, this, this.eventPublisher, this.activeDialogGroups,
this.i18nProvider, b);
if (activeProcessor == null) {
logger.debug("Executing a simple dialog for source {} ({})", audioSource.getLabel(null),
audioSource.getId());
Expand Down Expand Up @@ -970,6 +975,8 @@ private void buildDialogRegistrations() {
.withVoice(getVoice(dr.voiceId)) //
.withHLIs(getHLIsByIds(dr.hliIds)) //
.withLocale(dr.locale) //
.withDialogGroup(dr.dialogGroup) //
.withLocationItem(dr.locationItem) //
.withListeningItem(dr.listeningItem) //
.withMelody(dr.listeningMelody) //
.build());
Expand Down
Loading

0 comments on commit d87ef1f

Please sign in to comment.