From 877b2722aa118f4f164d9dad28a496dcf45ca3d9 Mon Sep 17 00:00:00 2001 From: Gwendal Roulleau Date: Sat, 8 Jul 2023 15:21:29 +0200 Subject: [PATCH 1/2] [googletts] Replace custom TTS cache with common TTS cache Signed-off-by: Gwendal Roulleau --- bundles/org.openhab.voice.googletts/README.md | 8 +- .../googletts/internal/GoogleCloudAPI.java | 124 +----------------- .../googletts/internal/GoogleTTSConfig.java | 7 +- .../googletts/internal/GoogleTTSService.java | 59 +++++---- .../main/resources/OH-INF/config/config.xml | 8 -- .../OH-INF/i18n/googletts.properties | 2 - 6 files changed, 38 insertions(+), 170 deletions(-) diff --git a/bundles/org.openhab.voice.googletts/README.md b/bundles/org.openhab.voice.googletts/README.md index e704945f10d80..8822dc4e3da02 100644 --- a/bundles/org.openhab.voice.googletts/README.md +++ b/bundles/org.openhab.voice.googletts/README.md @@ -2,8 +2,7 @@ Google Cloud TTS Service uses the non-free Google Cloud Text-to-Speech API to convert text or Speech Synthesis Markup Language (SSML) input into audio data of natural human speech. It provides multiple voices, available in different languages and variants and applies DeepMind’s groundbreaking research in WaveNet and Google’s powerful neural networks. -The implementation caches the converted texts to reduce the load on the API and make the conversion faster. -You can find them in the `$OPENHAB_USERDATA/cache/org.openhab.voice.googletts` folder. +The Google Cloud TTS service uses the openHAB TTS cache to cache audio files produced from the most recent queries in order to reduce traffic, improve performance and reduce number of requests. Be aware, that using this service may incur cost on your Google Cloud account. You can find pricing information on the [documentation page](https://cloud.google.com/text-to-speech/#pricing-summary). @@ -47,10 +46,6 @@ It is recommended to clear this configuration parameter afterwards. * **Pitch** - The pitch of selected voice, up to 20 semitones. * **Volume Gain** - The volume of the output between 16dB and -96dB. * **Speaking Rate** - The speaking rate can be 4x faster or slower than the normal rate. -* **Purge Cache** - Purges the cache e.g. after testing different voice configuration parameters. - -When enabled the cache is purged once. -Make sure to disable this setting again so the cache is maintained after restarts. In case you would like to setup the service via a text file, create a new file in `$OPENHAB_ROOT/conf/services` named `googletts.cfg` @@ -63,7 +58,6 @@ org.openhab.voice.googletts:authcode=XXXXX org.openhab.voice.googletts:pitch=0 org.openhab.voice.googletts:volumeGain=0 org.openhab.voice.googletts:speakingRate=1 -org.openhab.voice.googletts:purgeCache=false ``` ### Default Text-to-Speech and Voice Configuration diff --git a/bundles/org.openhab.voice.googletts/src/main/java/org/openhab/voice/googletts/internal/GoogleCloudAPI.java b/bundles/org.openhab.voice.googletts/src/main/java/org/openhab/voice/googletts/internal/GoogleCloudAPI.java index 692d471701489..7fc039187e7bc 100644 --- a/bundles/org.openhab.voice.googletts/src/main/java/org/openhab/voice/googletts/internal/GoogleCloudAPI.java +++ b/bundles/org.openhab.voice.googletts/src/main/java/org/openhab/voice/googletts/internal/GoogleCloudAPI.java @@ -12,17 +12,9 @@ */ package org.openhab.voice.googletts.internal; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; import java.io.IOException; -import java.math.BigInteger; import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.security.MessageDigest; -import java.security.NoSuchAlgorithmException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Base64; import java.util.Dictionary; import java.util.HashMap; @@ -69,10 +61,6 @@ */ class GoogleCloudAPI { - private static final char EXTENSION_SEPARATOR = '.'; - private static final char UNIX_SEPARATOR = '/'; - private static final char WINDOWS_SEPARATOR = '\\'; - private static final String BEARER = "Bearer "; private static final String GCP_AUTH_URI = "https://accounts.google.com/o/oauth2/auth"; @@ -103,11 +91,6 @@ class GoogleCloudAPI { */ private final Map> voices = new HashMap<>(); - /** - * Cache folder - */ - private File cacheFolder; - /** * Configuration */ @@ -122,12 +105,10 @@ class GoogleCloudAPI { /** * Constructor. * - * @param cacheFolder Service cache folder */ - GoogleCloudAPI(ConfigurationAdmin configAdmin, OAuthFactory oAuthFactory, File cacheFolder) { + GoogleCloudAPI(ConfigurationAdmin configAdmin, OAuthFactory oAuthFactory) { this.configAdmin = configAdmin; this.oAuthFactory = oAuthFactory; - this.cacheFolder = cacheFolder; } /** @@ -161,15 +142,6 @@ void setConfig(GoogleTTSConfig config) { } else { voices.clear(); } - - // maintain cache - if (config.purgeCache) { - File[] files = cacheFolder.listFiles(); - if (files != null && files.length > 0) { - Arrays.stream(files).forEach(File::delete); - } - logger.debug("Cache purged."); - } } public void dispose() { @@ -341,34 +313,21 @@ private List listVoices() throws AuthenticationException, Commun * @param codec Requested codec * @return String array of Google audio format and the file extension to use. */ - private String[] getFormatForCodec(String codec) { + private String getFormatForCodec(String codec) { switch (codec) { case AudioFormat.CODEC_MP3: - return new String[] { AudioEncoding.MP3.toString(), "mp3" }; + return AudioEncoding.MP3.toString(); case AudioFormat.CODEC_PCM_SIGNED: - return new String[] { AudioEncoding.LINEAR16.toString(), "wav" }; + return AudioEncoding.LINEAR16.toString(); default: throw new IllegalArgumentException("Audio format " + codec + " is not yet supported"); } } public byte[] synthesizeSpeech(String text, GoogleTTSVoice voice, String codec) { - String[] format = getFormatForCodec(codec); - String fileNameInCache = getUniqueFilenameForText(text, voice.getTechnicalName()); - File audioFileInCache = new File(cacheFolder, fileNameInCache + "." + format[1]); + String format = getFormatForCodec(codec); try { - // check if in cache - if (audioFileInCache.exists()) { - logger.debug("Audio file {} was found in cache.", audioFileInCache.getName()); - return Files.readAllBytes(audioFileInCache.toPath()); - } - - // if not in cache, get audio data and put to cache - byte[] audio = synthesizeSpeechByGoogle(text, voice, format[0]); - if (audio != null) { - saveAudioAndTextToFile(text, audioFileInCache, audio, voice.getTechnicalName()); - } - return audio; + return synthesizeSpeechByGoogle(text, voice, format); } catch (AuthenticationException | CommunicationException e) { logger.warn("Error initializing Google Cloud TTS service: {}", e.getMessage()); if (oAuthService != null) { @@ -376,62 +335,10 @@ public byte[] synthesizeSpeech(String text, GoogleTTSVoice voice, String codec) oAuthService = null; } voices.clear(); - } catch (FileNotFoundException e) { - logger.warn("Could not write file {} to cache: {}", audioFileInCache, e.getMessage()); - } catch (IOException e) { - logger.debug("An unexpected IOException occurred: {}", e.getMessage()); } return null; } - /** - * Create cache entry. - * - * @param text Converted text. - * @param cacheFile Cache entry file. - * @param audio Byte array of the audio. - * @param voiceName Used voice - * @throws FileNotFoundException - * @throws IOException in case of file handling exceptions - */ - private void saveAudioAndTextToFile(String text, File cacheFile, byte[] audio, String voiceName) - throws IOException, FileNotFoundException { - logger.debug("Caching audio file {}", cacheFile.getName()); - try (FileOutputStream audioFileOutputStream = new FileOutputStream(cacheFile)) { - audioFileOutputStream.write(audio); - } - - // write text to file for transparency too - // this allows to know which contents is in which audio file - String textFileName = removeExtension(cacheFile.getName()) + ".txt"; - logger.debug("Caching text file {}", textFileName); - try (FileOutputStream textFileOutputStream = new FileOutputStream(new File(cacheFolder, textFileName))) { - // @formatter:off - StringBuilder sb = new StringBuilder("Config: ") - .append(config.toConfigString()) - .append(",voice=") - .append(voiceName) - .append(System.lineSeparator()) - .append("Text: ") - .append(text) - .append(System.lineSeparator()); - // @formatter:on - textFileOutputStream.write(sb.toString().getBytes(StandardCharsets.UTF_8)); - } - } - - /** - * Removes the extension of a file name. - * - * @param fileName the file name to remove the extension of - * @return the filename without the extension - */ - private String removeExtension(String fileName) { - int extensionPos = fileName.lastIndexOf(EXTENSION_SEPARATOR); - int lastSeparator = Math.max(fileName.lastIndexOf(UNIX_SEPARATOR), fileName.lastIndexOf(WINDOWS_SEPARATOR)); - return lastSeparator > extensionPos ? fileName : fileName.substring(0, extensionPos); - } - /** * Call Google service to synthesize the required text * @@ -476,25 +383,6 @@ private byte[] synthesizeSpeechByGoogle(String text, GoogleTTSVoice voice, Strin return null; } - /** - * Gets a unique filename for a give text, by creating a MD5 hash of it. It - * will be preceded by the locale. - *

- * Sample: "en-US_00a2653ac5f77063bc4ea2fee87318d3" - */ - private String getUniqueFilenameForText(String text, String voiceName) { - try { - MessageDigest md = MessageDigest.getInstance("MD5"); - byte[] bytesOfMessage = (config.toConfigString() + text).getBytes(StandardCharsets.UTF_8); - String fileNameHash = String.format("%032x", new BigInteger(1, md.digest(bytesOfMessage))); - return voiceName + "_" + fileNameHash; - } catch (NoSuchAlgorithmException e) { - // should not happen - logger.error("Could not create MD5 hash for '{}'", text, e); - return null; - } - } - boolean isInitialized() { return oAuthService != null; } diff --git a/bundles/org.openhab.voice.googletts/src/main/java/org/openhab/voice/googletts/internal/GoogleTTSConfig.java b/bundles/org.openhab.voice.googletts/src/main/java/org/openhab/voice/googletts/internal/GoogleTTSConfig.java index 2a3291797c5bc..c8fcada8ccf26 100644 --- a/bundles/org.openhab.voice.googletts/src/main/java/org/openhab/voice/googletts/internal/GoogleTTSConfig.java +++ b/bundles/org.openhab.voice.googletts/src/main/java/org/openhab/voice/googletts/internal/GoogleTTSConfig.java @@ -44,15 +44,10 @@ class GoogleTTSConfig { */ public Double speakingRate = 1d; - /** - * Purge cache after configuration changes. - */ - public Boolean purgeCache = Boolean.FALSE; - @Override public String toString() { return "GoogleTTSConfig{pitch=" + pitch + ", speakingRate=" + speakingRate + ", volumeGainDb=" + volumeGainDb - + ", purgeCache=" + purgeCache + '}'; + + '}'; } String toConfigString() { diff --git a/bundles/org.openhab.voice.googletts/src/main/java/org/openhab/voice/googletts/internal/GoogleTTSService.java b/bundles/org.openhab.voice.googletts/src/main/java/org/openhab/voice/googletts/internal/GoogleTTSService.java index 66f682dafbd93..b586100e02a67 100644 --- a/bundles/org.openhab.voice.googletts/src/main/java/org/openhab/voice/googletts/internal/GoogleTTSService.java +++ b/bundles/org.openhab.voice.googletts/src/main/java/org/openhab/voice/googletts/internal/GoogleTTSService.java @@ -15,24 +15,29 @@ import static org.openhab.voice.googletts.internal.GoogleTTSService.*; import java.io.ByteArrayInputStream; -import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.math.BigInteger; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; import java.util.Collections; import java.util.HashSet; import java.util.Locale; import java.util.Map; import java.util.Set; +import org.eclipse.jdt.annotation.NonNull; import org.eclipse.jdt.annotation.NonNullByDefault; import org.eclipse.jdt.annotation.Nullable; -import org.openhab.core.OpenHAB; import org.openhab.core.audio.AudioFormat; import org.openhab.core.audio.AudioStream; import org.openhab.core.audio.ByteArrayAudioStream; import org.openhab.core.audio.utils.AudioWaveUtils; import org.openhab.core.auth.client.oauth2.OAuthFactory; import org.openhab.core.config.core.ConfigurableService; +import org.openhab.core.voice.AbstractCachedTTSService; +import org.openhab.core.voice.TTSCache; import org.openhab.core.voice.TTSException; import org.openhab.core.voice.TTSService; import org.openhab.core.voice.Voice; @@ -52,10 +57,11 @@ * * @author Gabor Bicskei - Initial contribution */ -@Component(configurationPid = SERVICE_PID, property = Constants.SERVICE_PID + "=" + SERVICE_PID) +@Component(configurationPid = SERVICE_PID, property = Constants.SERVICE_PID + "=" + + SERVICE_PID, service = TTSService.class) @ConfigurableService(category = SERVICE_CATEGORY, label = SERVICE_NAME + " Text-to-Speech", description_uri = SERVICE_CATEGORY + ":" + SERVICE_ID) -public class GoogleTTSService implements TTSService { +public class GoogleTTSService extends AbstractCachedTTSService { /** * Service name */ @@ -76,11 +82,6 @@ public class GoogleTTSService implements TTSService { */ static final String SERVICE_PID = "org.openhab." + SERVICE_CATEGORY + "." + SERVICE_ID; - /** - * Cache folder under $userdata - */ - private static final String CACHE_FOLDER_NAME = "cache"; - /** * Configuration parameters */ @@ -90,7 +91,6 @@ public class GoogleTTSService implements TTSService { private static final String PARAM_PITCH = "pitch"; private static final String PARAM_SPEAKING_RATE = "speakingRate"; private static final String PARAM_VOLUME_GAIN_DB = "volumeGainDb"; - private static final String PARAM_PURGE_CACHE = "purgeCache"; /** * Logger. @@ -117,8 +117,9 @@ public class GoogleTTSService implements TTSService { private final GoogleTTSConfig config = new GoogleTTSConfig(); @Activate - public GoogleTTSService(final @Reference ConfigurationAdmin configAdmin, - final @Reference OAuthFactory oAuthFactory) { + public GoogleTTSService(final @Reference ConfigurationAdmin configAdmin, final @Reference OAuthFactory oAuthFactory, + @Reference TTSCache ttsCache, Map config) { + super(ttsCache); this.configAdmin = configAdmin; this.oAuthFactory = oAuthFactory; } @@ -128,15 +129,7 @@ public GoogleTTSService(final @Reference ConfigurationAdmin configAdmin, */ @Activate protected void activate(Map config) { - // create cache folder - File userData = new File(OpenHAB.getUserDataFolder()); - File cacheFolder = new File(new File(userData, CACHE_FOLDER_NAME), SERVICE_PID); - if (!cacheFolder.exists()) { - cacheFolder.mkdirs(); - } - logger.debug("Using cache folder {}", cacheFolder.getAbsolutePath()); - - apiImpl = new GoogleCloudAPI(configAdmin, oAuthFactory, cacheFolder); + apiImpl = new GoogleCloudAPI(configAdmin, oAuthFactory); updateConfig(config); } @@ -236,13 +229,6 @@ private void updateConfig(Map newConfig) { config.volumeGainDb = Double.parseDouble(param); } - // purgeCache - param = newConfig.containsKey(PARAM_PURGE_CACHE) ? newConfig.get(PARAM_PURGE_CACHE).toString() : null; - if (param != null) { - config.purgeCache = Boolean.parseBoolean(param); - } - logger.trace("New configuration: {}", config.toString()); - if (config.clientId != null && !config.clientId.isEmpty() && config.clientSecret != null && !config.clientSecret.isEmpty()) { apiImpl.setConfig(config); @@ -313,7 +299,7 @@ public Set getSupportedFormats() { * @throws TTSException in case the service is unavailable or a parameter is invalid. */ @Override - public AudioStream synthesize(String text, Voice voice, AudioFormat requestedFormat) throws TTSException { + public AudioStream synthesizeForCache(String text, Voice voice, AudioFormat requestedFormat) throws TTSException { logger.debug("Synthesize '{}' for voice '{}' in format {}", text, voice.getUID(), requestedFormat); // Validate known api key if (!apiImpl.isInitialized()) { @@ -361,4 +347,19 @@ private AudioFormat parseAudioFormat(byte[] audio) throws TTSException { throw new TTSException("Cannot parse WAV format", e); } } + + @Override + public @NonNull String getCacheKey(@NonNull String text, @NonNull Voice voice, + @NonNull AudioFormat requestedFormat) { + try { + MessageDigest md = MessageDigest.getInstance("MD5"); + byte[] bytesOfMessage = (config.toConfigString() + text + requestedFormat).getBytes(StandardCharsets.UTF_8); + String hash = String.format("%032x", new BigInteger(1, md.digest(bytesOfMessage))); + return ((GoogleTTSVoice) voice).getTechnicalName() + "_" + hash; + } catch (NoSuchAlgorithmException e) { + // should not happen + logger.error("Could not create MD5 hash for '{}'", text, e); + return "nomd5algorithm"; + } + } } diff --git a/bundles/org.openhab.voice.googletts/src/main/resources/OH-INF/config/config.xml b/bundles/org.openhab.voice.googletts/src/main/resources/OH-INF/config/config.xml index f92ce5b80aa13..1e7b9e2267f0f 100644 --- a/bundles/org.openhab.voice.googletts/src/main/resources/OH-INF/config/config.xml +++ b/bundles/org.openhab.voice.googletts/src/main/resources/OH-INF/config/config.xml @@ -43,13 +43,5 @@ Speaking rate can be 4x faster or slower than the normal rate. 1 - - true - - Purges the cache e.g. after testing different voice configuration parameters. When enabled the cache is - purged once. Make sure to disable this setting again so the cache is maintained after restarts. - false - - diff --git a/bundles/org.openhab.voice.googletts/src/main/resources/OH-INF/i18n/googletts.properties b/bundles/org.openhab.voice.googletts/src/main/resources/OH-INF/i18n/googletts.properties index 2f3850d252cc9..d37b41bc6b7d2 100644 --- a/bundles/org.openhab.voice.googletts/src/main/resources/OH-INF/i18n/googletts.properties +++ b/bundles/org.openhab.voice.googletts/src/main/resources/OH-INF/i18n/googletts.properties @@ -10,8 +10,6 @@ voice.config.googletts.group.tts.label = TTS Configuration voice.config.googletts.group.tts.description = Parameters for Google Cloud TTS API. voice.config.googletts.pitch.label = Pitch voice.config.googletts.pitch.description = Customize the pitch of your selected voice, up to 20 semitones more or less than the default output. -voice.config.googletts.purgeCache.label = Purge Cache -voice.config.googletts.purgeCache.description = Purges the cache e.g. after testing different voice configuration parameters. When enabled the cache is purged once. Make sure to disable this setting again so the cache is maintained after restarts. voice.config.googletts.speakingRate.label = Speaking Rate voice.config.googletts.speakingRate.description = Speaking rate can be 4x faster or slower than the normal rate. voice.config.googletts.volumeGain.label = Volume Gain From 90bac58589e2ee989b14324f2ddab1ec1d238910 Mon Sep 17 00:00:00 2001 From: Gwendal Roulleau Date: Mon, 10 Jul 2023 22:10:50 +0200 Subject: [PATCH 2/2] [googletts] Replace custom TTS cache with common TTS cache Apply code review Signed-off-by: Gwendal Roulleau --- .../org/openhab/voice/googletts/internal/GoogleTTSService.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bundles/org.openhab.voice.googletts/src/main/java/org/openhab/voice/googletts/internal/GoogleTTSService.java b/bundles/org.openhab.voice.googletts/src/main/java/org/openhab/voice/googletts/internal/GoogleTTSService.java index b586100e02a67..d03e72c8d0d2e 100644 --- a/bundles/org.openhab.voice.googletts/src/main/java/org/openhab/voice/googletts/internal/GoogleTTSService.java +++ b/bundles/org.openhab.voice.googletts/src/main/java/org/openhab/voice/googletts/internal/GoogleTTSService.java @@ -358,7 +358,7 @@ private AudioFormat parseAudioFormat(byte[] audio) throws TTSException { return ((GoogleTTSVoice) voice).getTechnicalName() + "_" + hash; } catch (NoSuchAlgorithmException e) { // should not happen - logger.error("Could not create MD5 hash for '{}'", text, e); + logger.warn("Could not create MD5 hash for '{}'", text, e); return "nomd5algorithm"; } }