Skip to content

Commit

Permalink
[voice] Allow list of HLIs in dialog function (openhab#2906)
Browse files Browse the repository at this point in the history
* [Voice] Allow hli list

Signed-off-by: Miguel Álvarez Díez <miguelwork92@gmail.com>
GitOrigin-RevId: 5753627
  • Loading branch information
GiviMAD authored and splatch committed Jul 12, 2023
1 parent dfabb9c commit 5516e59
Show file tree
Hide file tree
Showing 7 changed files with 251 additions and 114 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.stream.Collectors;

import javax.annotation.security.RolesAllowed;
Expand Down Expand Up @@ -55,6 +56,8 @@
import org.osgi.service.jaxrs.whiteboard.propertytypes.JaxrsApplicationSelect;
import org.osgi.service.jaxrs.whiteboard.propertytypes.JaxrsName;
import org.osgi.service.jaxrs.whiteboard.propertytypes.JaxrsResource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.Parameter;
Expand Down Expand Up @@ -86,6 +89,7 @@ public class VoiceResource implements RESTResource {
/** The URI path to this resource */
public static final String PATH_VOICE = "voice";

private final Logger logger = LoggerFactory.getLogger(VoiceResource.class);
private final LocaleService localeService;
private final AudioManager audioManager;
private final VoiceManager voiceManager;
Expand Down Expand Up @@ -133,27 +137,38 @@ public Response getInterpreter(
}

@POST
@Path("/interpreters/{id: [a-zA-Z_0-9]+}")
@Path("/interpreters/{ids: [a-zA-Z_0-9,]+}")
@Consumes(MediaType.TEXT_PLAIN)
@Operation(operationId = "interpretText", summary = "Sends a text to a given human language interpreter.", responses = {
@Operation(operationId = "interpretText", summary = "Sends a text to a given human language interpreter(s).", responses = {
@ApiResponse(responseCode = "200", description = "OK"),
@ApiResponse(responseCode = "404", description = "No human language interpreter was found."),
@ApiResponse(responseCode = "400", description = "interpretation exception occurs") })
public Response interpret(
@HeaderParam(HttpHeaders.ACCEPT_LANGUAGE) @Parameter(description = "language") @Nullable String language,
@Parameter(description = "text to interpret", required = true) String text,
@PathParam("id") @Parameter(description = "interpreter id") String id) {
@PathParam("ids") @Parameter(description = "comma separated list of interpreter ids") List<String> ids) {
final Locale locale = localeService.getLocale(language);
HumanLanguageInterpreter hli = voiceManager.getHLI(id);
if (hli == null) {
List<HumanLanguageInterpreter> hlis = voiceManager.getHLIsByIds(ids);
if (hlis.isEmpty()) {
return JSONResponse.createErrorResponse(Status.NOT_FOUND, "No interpreter found");
}

try {
hli.interpret(locale, text);
return Response.ok(null, MediaType.TEXT_PLAIN).build();
} catch (InterpretationException e) {
return JSONResponse.createErrorResponse(Status.BAD_REQUEST, e.getMessage());
String answer = "";
String error = null;
for (HumanLanguageInterpreter interpreter : hlis) {
try {
answer = interpreter.interpret(locale, text);
logger.debug("Interpretation result: {}", answer);
error = null;
break;
} catch (InterpretationException e) {
logger.debug("Interpretation exception: {}", e.getMessage());
error = Objects.requireNonNullElse(e.getMessage(), "Unexpected error");
}
}
if (error != null) {
return JSONResponse.createErrorResponse(Status.BAD_REQUEST, error);
} else {
return Response.ok(answer, MediaType.TEXT_PLAIN).build();
}
}

Expand Down Expand Up @@ -232,7 +247,7 @@ public Response startDialog(
@QueryParam("ksId") @Parameter(description = "keywork spotter ID") @Nullable String ksId,
@QueryParam("sttId") @Parameter(description = "Speech-to-Text ID") @Nullable String sttId,
@QueryParam("ttsId") @Parameter(description = "Text-to-Speech ID") @Nullable String ttsId,
@QueryParam("hliId") @Parameter(description = "interpreter ID") @Nullable String hliId,
@QueryParam("hliIds") @Parameter(description = "comma separated list of interpreter IDs") @Nullable String hliIds,
@QueryParam("sinkId") @Parameter(description = "audio sink ID") @Nullable String sinkId,
@QueryParam("keyword") @Parameter(description = "keyword") @Nullable String keyword,
@QueryParam("listeningItem") @Parameter(description = "listening item") @Nullable String listeningItem) {
Expand Down Expand Up @@ -264,10 +279,10 @@ public Response startDialog(
return JSONResponse.createErrorResponse(Status.NOT_FOUND, "Text-to-Speech not found");
}
}
HumanLanguageInterpreter hli = null;
if (hliId != null) {
hli = voiceManager.getHLI(hliId);
if (hli == null) {
List<HumanLanguageInterpreter> interpreters = List.of();
if (hliIds != null) {
interpreters = voiceManager.getHLIsByIds(hliIds);
if (interpreters.isEmpty()) {
return JSONResponse.createErrorResponse(Status.NOT_FOUND, "Interpreter not found");
}
}
Expand All @@ -281,7 +296,7 @@ public Response startDialog(
final Locale locale = localeService.getLocale(language);

try {
voiceManager.startDialog(ks, stt, tts, hli, source, sink, locale, keyword, listeningItem);
voiceManager.startDialog(ks, stt, tts, interpreters, source, sink, locale, keyword, listeningItem);
return Response.ok(null, MediaType.TEXT_PLAIN).build();
} catch (IllegalStateException e) {
return JSONResponse.createErrorResponse(Status.BAD_REQUEST, e.getMessage());
Expand Down Expand Up @@ -325,7 +340,7 @@ public Response listenAndAnswer(
@QueryParam("sourceId") @Parameter(description = "source ID") @Nullable String sourceId,
@QueryParam("sttId") @Parameter(description = "Speech-to-Text ID") @Nullable String sttId,
@QueryParam("ttsId") @Parameter(description = "Text-to-Speech ID") @Nullable String ttsId,
@QueryParam("hliId") @Parameter(description = "interpreter ID") @Nullable String hliId,
@QueryParam("hliIds") @Parameter(description = "interpreter IDs") @Nullable List<String> hliIds,
@QueryParam("sinkId") @Parameter(description = "audio sink ID") @Nullable String sinkId,
@QueryParam("listeningItem") @Parameter(description = "listening item") @Nullable String listeningItem) {
AudioSource source = null;
Expand All @@ -349,10 +364,10 @@ public Response listenAndAnswer(
return JSONResponse.createErrorResponse(Status.NOT_FOUND, "Text-to-Speech not found");
}
}
HumanLanguageInterpreter hli = null;
if (hliId != null) {
hli = voiceManager.getHLI(hliId);
if (hli == null) {
List<HumanLanguageInterpreter> interpreters = List.of();
if (hliIds != null) {
interpreters = voiceManager.getHLIsByIds(hliIds);
if (interpreters.isEmpty()) {
return JSONResponse.createErrorResponse(Status.NOT_FOUND, "Interpreter not found");
}
}
Expand All @@ -366,7 +381,7 @@ public Response listenAndAnswer(
final Locale locale = localeService.getLocale(language);

try {
voiceManager.listenAndAnswer(stt, tts, hli, source, sink, locale, listeningItem);
voiceManager.listenAndAnswer(stt, tts, interpreters, source, sink, locale, listeningItem);
return Response.ok(null, MediaType.TEXT_PLAIN).build();
} catch (IllegalStateException e) {
return JSONResponse.createErrorResponse(Status.BAD_REQUEST, e.getMessage());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
*/
package org.openhab.core.model.script.actions;

import java.util.Collection;
import java.util.List;
import java.util.Locale;
import java.util.Objects;

Expand Down Expand Up @@ -157,14 +159,14 @@ public static String interpret(@ParamDoc(name = "text") Object text) {
* In case of interpretation error, the error message is played using the default audio sink.
*
* @param text The text to interpret
* @param interpreter The Human Language Interpreter to be used
* @param interpreters Comma separated list of human language text interpreters to use
*/
@ActionDoc(text = "interprets a given text by a given human language interpreter", returns = "human language response")
@ActionDoc(text = "interprets a given text by a given human language interpreter(s)", returns = "human language response")
public static String interpret(@ParamDoc(name = "text") Object text,
@ParamDoc(name = "interpreter") @Nullable String interpreter) {
@ParamDoc(name = "interpreters") @Nullable String interpreters) {
String response;
try {
response = VoiceActionService.voiceManager.interpret(text.toString(), interpreter);
response = VoiceActionService.voiceManager.interpret(text.toString(), interpreters);
} catch (InterpretationException e) {
String message = Objects.requireNonNullElse(e.getMessage(), "");
say(message);
Expand All @@ -180,15 +182,15 @@ public static String interpret(@ParamDoc(name = "text") Object text,
* If sink parameter is null, the error message is simply not played.
*
* @param text The text to interpret
* @param interpreter The Human Language Interpreter to be used
* @param interpreters Comma separated list of human language text interpreters to use
* @param sink The name of audio sink to be used to play the error message
*/
@ActionDoc(text = "interprets a given text by a given human language interpreter", returns = "human language response")
@ActionDoc(text = "interprets a given text by a given human language interpreter(s)", returns = "human language response")
public static String interpret(@ParamDoc(name = "text") Object text,
@ParamDoc(name = "interpreter") String interpreter, @ParamDoc(name = "sink") @Nullable String sink) {
@ParamDoc(name = "interpreters") String interpreters, @ParamDoc(name = "sink") @Nullable String sink) {
String response;
try {
response = VoiceActionService.voiceManager.interpret(text.toString(), interpreter);
response = VoiceActionService.voiceManager.interpret(text.toString(), interpreters);
} catch (InterpretationException e) {
String message = Objects.requireNonNullElse(e.getMessage(), "");
if (sink != null) {
Expand Down Expand Up @@ -218,18 +220,18 @@ public static void startDialog(@ParamDoc(name = "source") @Nullable String sourc
* @param ks the keyword spotting service to use or null to use the default service
* @param stt the speech-to-text service to use or null to use the default service
* @param tts the text-to-speech service to use or null to use the default service
* @param interpreter the human language text interpreter to use or null to use the default service
* @param interpreters comma separated list of human language text interpreters to use or null to use the default service
* @param source the name of audio source to use or null to use the default source
* @param sink the name of audio sink to use or null to use the default sink
* @param Locale the locale to use or null to use the default locale
* @param locale the locale to use or null to use the default locale
* @param keyword the keyword to use during keyword spotting or null to use the default keyword
* @param listeningItem the item to switch ON while listening to a question
*/
@ActionDoc(text = "starts dialog processing for a given audio source")
public static void startDialog(@ParamDoc(name = "keyword spotting service") @Nullable String ks,
@ParamDoc(name = "speech-to-text service") @Nullable String stt,
@ParamDoc(name = "text-to-speech service") @Nullable String tts,
@ParamDoc(name = "interpreter") @Nullable String interpreter,
@ParamDoc(name = "interpreters") @Nullable String interpreters,
@ParamDoc(name = "source") @Nullable String source, @ParamDoc(name = "sink") @Nullable String sink,
@ParamDoc(name = "locale") @Nullable String locale, @ParamDoc(name = "keyword") @Nullable String keyword,
@ParamDoc(name = "listening item") @Nullable String listeningItem) {
Expand Down Expand Up @@ -265,11 +267,11 @@ public static void startDialog(@ParamDoc(name = "keyword spotting service") @Nul
return;
}
}
HumanLanguageInterpreter hliService = null;
if (interpreter != null) {
hliService = VoiceActionService.voiceManager.getHLI(interpreter);
if (hliService == null) {
logger.warn("Failed starting dialog processing: interpreter '{}' not found", interpreter);
List<HumanLanguageInterpreter> hliServices = List.of();
if (interpreters != null) {
hliServices = VoiceActionService.voiceManager.getHLIsByIds(interpreters);
if (hliServices.isEmpty()) {
logger.warn("Failed starting dialog processing: interpreters '{}' not found", interpreters);
return;
}
}
Expand All @@ -292,7 +294,7 @@ public static void startDialog(@ParamDoc(name = "keyword spotting service") @Nul
}

try {
VoiceActionService.voiceManager.startDialog(ksService, sttService, ttsService, hliService, audioSource,
VoiceActionService.voiceManager.startDialog(ksService, sttService, ttsService, hliServices, audioSource,
audioSink, loc, keyword, listeningItem);
} catch (IllegalStateException e) {
logger.warn("Failed starting dialog processing: {}", e.getMessage());
Expand Down Expand Up @@ -339,16 +341,16 @@ public static void listenAndAnswer(@ParamDoc(name = "source") @Nullable String s
*
* @param stt the speech-to-text service to use or null to use the default service
* @param tts the text-to-speech service to use or null to use the default service
* @param interpreter the human language text interpreter to use or null to use the default service
* @param interpreters comma separated list of human language text interpreters to use or null to use the default service
* @param source the name of audio source to use or null to use the default source
* @param sink the name of audio sink to use or null to use the default sink
* @param Locale the locale to use or null to use the default locale
* @param locale the locale to use or null to use the default locale
* @param listeningItem the item to switch ON while listening to a question
*/
@ActionDoc(text = "executes a simple dialog sequence without keyword spotting for a given audio source")
public static void listenAndAnswer(@ParamDoc(name = "speech-to-text service") @Nullable String stt,
@ParamDoc(name = "text-to-speech service") @Nullable String tts,
@ParamDoc(name = "interpreter") @Nullable String interpreter,
@ParamDoc(name = "interpreters") @Nullable String interpreters,
@ParamDoc(name = "source") @Nullable String source, @ParamDoc(name = "sink") @Nullable String sink,
@ParamDoc(name = "locale") @Nullable String locale,
@ParamDoc(name = "listening item") @Nullable String listeningItem) {
Expand Down Expand Up @@ -376,11 +378,11 @@ public static void listenAndAnswer(@ParamDoc(name = "speech-to-text service") @N
return;
}
}
HumanLanguageInterpreter hliService = null;
if (interpreter != null) {
hliService = VoiceActionService.voiceManager.getHLI(interpreter);
if (hliService == null) {
logger.warn("Failed executing simple dialog: interpreter '{}' not found", interpreter);
List<HumanLanguageInterpreter> hliServices = List.of();
if (interpreters != null) {
hliServices = VoiceActionService.voiceManager.getHLIsByIds(interpreters);
if (hliServices.isEmpty()) {
logger.warn("Failed executing simple dialog: interpreters '{}' not found", interpreters);
return;
}
}
Expand All @@ -403,7 +405,7 @@ public static void listenAndAnswer(@ParamDoc(name = "speech-to-text service") @N
}

try {
VoiceActionService.voiceManager.listenAndAnswer(sttService, ttsService, hliService, audioSource, audioSink,
VoiceActionService.voiceManager.listenAndAnswer(sttService, ttsService, hliServices, audioSource, audioSink,
loc, listeningItem);
} catch (IllegalStateException e) {
logger.warn("Failed executing simple dialog: {}", e.getMessage());
Expand Down
Loading

0 comments on commit 5516e59

Please sign in to comment.