[voicerss] Add support for WAV audio format (openhab#11916)

* [voicerss] add unit test for supported formats * [voicerss] add support for WAV audio format Signed-off-by: Andreas Brenk <mail@andreasbrenk.com>
andan67 · Nov 5, 2022 · 9fc9e4c · 9fc9e4c
1 parent 9279dd1
commit 9fc9e4c
Show file tree

Hide file tree

Showing 7 changed files with 263 additions and 71 deletions.
diff --git a/....voice.voicerss/src/main/java/org/openhab/voice/voicerss/internal/VoiceRSSTTSService.java b/....voice.voicerss/src/main/java/org/openhab/voice/voicerss/internal/VoiceRSSTTSService.java
@@ -50,6 +50,20 @@ public class VoiceRSSTTSService implements TTSService {
 
     // API Key comes from ConfigAdmin
     private static final String CONFIG_API_KEY = "apiKey";
+
+    /**
+     * Map from openHAB AudioFormat Codec to VoiceRSS API Audio Codec
+     */
+    private static final Map<String, String> CODEC_MAP = Map.of(AudioFormat.CODEC_PCM_SIGNED, "WAV",
+            AudioFormat.CODEC_PCM_UNSIGNED, "WAV", AudioFormat.CODEC_PCM_ALAW, "WAV", AudioFormat.CODEC_PCM_ULAW, "WAV",
+            AudioFormat.CODEC_MP3, "MP3", AudioFormat.CODEC_VORBIS, "OGG", AudioFormat.CODEC_AAC, "AAC");
+
+    /**
+     * Map from openHAB AudioFormat Frequency to VoiceRSS API Audio Frequency
+     */
+    private static final Map<Long, String> FREQUENCY_MAP = Map.of(8_000L, "8khz", 11_025L, "11khz", 12_000L, "12khz",
+            16_000L, "16khz", 22_050L, "22khz", 24_000L, "24khz", 32_000L, "32khz", 44_100L, "44khz", 48_000L, "48khz");
+
     private String apiKey;
 
     private final Logger logger = LoggerFactory.getLogger(VoiceRSSTTSService.class);
@@ -121,22 +135,12 @@ public AudioStream synthesize(String text, Voice voice, AudioFormat requestedFor
         if (!voices.contains(voice)) {
             throw new TTSException("The passed voice is unsupported");
         }
-        boolean isAudioFormatSupported = false;
-        for (AudioFormat currentAudioFormat : audioFormats) {
-            if (currentAudioFormat.isCompatible(requestedFormat)) {
-                isAudioFormatSupported = true;
-                break;
-            }
-        }
-        if (!isAudioFormatSupported) {
-            throw new TTSException("The passed AudioFormat is unsupported");
-        }
 
-        // now create the input stream for given text, locale, format. There is
-        // only a default voice
+        // now create the input stream for given text, locale, voice, codec and format.
         try {
             File cacheAudioFile = voiceRssImpl.getTextToSpeechAsFile(apiKey, trimmedText,
-                    voice.getLocale().toLanguageTag(), voice.getLabel(), getApiAudioFormat(requestedFormat));
+                    voice.getLocale().toLanguageTag(), voice.getLabel(), getApiAudioCodec(requestedFormat),
+                    getApiAudioFormat(requestedFormat));
             if (cacheAudioFile == null) {
                 throw new TTSException("Could not read from VoiceRSS service");
             }
@@ -169,46 +173,53 @@ private Set<Voice> initVoices() {
      * @return The audio formats of this instance
      */
     private Set<AudioFormat> initAudioFormats() {
-        Set<AudioFormat> audioFormats = new HashSet<>();
-        for (String format : voiceRssImpl.getAvailableAudioFormats()) {
-            audioFormats.add(getAudioFormat(format));
-        }
-        return audioFormats;
+        return voiceRssImpl.getAvailableAudioFormats();
     }
 
-    private AudioFormat getAudioFormat(String apiFormat) {
-        Boolean bigEndian = null;
-        Integer bitDepth = 16;
-        Integer bitRate = null;
-        Long frequency = 44100L;
-
-        if ("MP3".equals(apiFormat)) {
-            // we use by default: MP3, 44khz_16bit_mono with bitrate 64 kbps
-            bitRate = 64000;
-            return new AudioFormat(AudioFormat.CONTAINER_NONE, AudioFormat.CODEC_MP3, bigEndian, bitDepth, bitRate,
-                    frequency);
-        } else if ("OGG".equals(apiFormat)) {
-            // we use by default: OGG, 44khz_16bit_mono
-            return new AudioFormat(AudioFormat.CONTAINER_OGG, AudioFormat.CODEC_VORBIS, bigEndian, bitDepth, bitRate,
-                    frequency);
-        } else if ("AAC".equals(apiFormat)) {
-            // we use by default: AAC, 44khz_16bit_mono
-            return new AudioFormat(AudioFormat.CONTAINER_NONE, AudioFormat.CODEC_AAC, bigEndian, bitDepth, bitRate,
-                    frequency);
-        } else {
-            throw new IllegalArgumentException("Audio format " + apiFormat + " not yet supported");
+    /**
+     * Map {@link AudioFormat#getCodec() codec} to VoiceRSS API codec.
+     *
+     * @throws TTSException if {@code format} is not supported
+     */
+    private String getApiAudioCodec(AudioFormat format) throws TTSException {
+        final String internalCodec = format.getCodec();
+        final String apiCodec = CODEC_MAP.get(internalCodec != null ? internalCodec : AudioFormat.CODEC_PCM_SIGNED);
+
+        if (apiCodec == null) {
+            throw new TTSException("Unsupported audio format: " + format);
         }
+
+        return apiCodec;
     }
 
-    private String getApiAudioFormat(AudioFormat format) {
-        if (format.getCodec().equals(AudioFormat.CODEC_MP3)) {
-            return "MP3";
-        } else if (format.getCodec().equals(AudioFormat.CODEC_VORBIS)) {
-            return "OGG";
-        } else if (format.getCodec().equals(AudioFormat.CODEC_AAC)) {
-            return "AAC";
-        } else {
-            throw new IllegalArgumentException("Audio format " + format.getCodec() + " not yet supported");
+    /**
+     * Map {@link AudioFormat#getBitDepth() bit depth} and {@link AudioFormat#getFrequency() frequency} to VoiceRSS API
+     * format.
+     *
+     * @throws TTSException if {@code format} is not supported
+     */
+    private String getApiAudioFormat(AudioFormat format) throws TTSException {
+        final int bitDepth = format.getBitDepth() != null ? format.getBitDepth() : 16;
+        final Long frequency = format.getFrequency() != null ? format.getFrequency() : 44_100L;
+        final String apiFrequency = FREQUENCY_MAP.get(frequency);
+
+        if (apiFrequency == null || (bitDepth != 8 && bitDepth != 16)) {
+            throw new TTSException("Unsupported audio format: " + format);
+        }
+
+        switch (format.getCodec() != null ? format.getCodec() : AudioFormat.CODEC_PCM_SIGNED) {
+            case AudioFormat.CODEC_PCM_ALAW:
+                return "alaw_" + apiFrequency + "_mono";
+            case AudioFormat.CODEC_PCM_ULAW:
+                return "ulaw_" + apiFrequency + "_mono";
+            case AudioFormat.CODEC_PCM_SIGNED:
+            case AudioFormat.CODEC_PCM_UNSIGNED:
+            case AudioFormat.CODEC_MP3:
+            case AudioFormat.CODEC_VORBIS:
+            case AudioFormat.CODEC_AAC:
+                return apiFrequency + "_" + bitDepth + "_mono";
+            default:
+                throw new TTSException("Unsupported audio format: " + format);
         }
     }
 

diff --git a/...s/src/main/java/org/openhab/voice/voicerss/internal/cloudapi/CachedVoiceRSSCloudImpl.java b/...s/src/main/java/org/openhab/voice/voicerss/internal/cloudapi/CachedVoiceRSSCloudImpl.java
@@ -22,6 +22,7 @@
 import java.nio.charset.StandardCharsets;
 import java.security.MessageDigest;
 import java.security.NoSuchAlgorithmException;
+import java.util.Objects;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -55,17 +56,17 @@ public CachedVoiceRSSCloudImpl(String cacheFolderName) {
         }
     }
 
-    public File getTextToSpeechAsFile(String apiKey, String text, String locale, String voice, String audioFormat)
-            throws IOException {
-        String fileNameInCache = getUniqueFilenameForText(text, locale, voice);
+    public File getTextToSpeechAsFile(String apiKey, String text, String locale, String voice, String audioCodec,
+            String audioFormat) throws IOException {
+        String fileNameInCache = getUniqueFilenameForText(text, locale, voice, audioFormat);
         // check if in cache
-        File audioFileInCache = new File(cacheFolder, fileNameInCache + "." + audioFormat.toLowerCase());
+        File audioFileInCache = new File(cacheFolder, fileNameInCache + "." + audioCodec.toLowerCase());
         if (audioFileInCache.exists()) {
             return audioFileInCache;
         }
 
         // if not in cache, get audio data and put to cache
-        try (InputStream is = super.getTextToSpeech(apiKey, text, locale, voice, audioFormat);
+        try (InputStream is = super.getTextToSpeech(apiKey, text, locale, voice, audioCodec, audioFormat);
                 FileOutputStream fos = new FileOutputStream(audioFileInCache)) {
             copyStream(is, fos);
             // write text to file for transparency too
@@ -85,11 +86,12 @@ public File getTextToSpeechAsFile(String apiKey, String text, String locale, Str
 
     /**
      * Gets a unique filename for a give text, by creating a MD5 hash of it. It
-     * will be preceded by the locale.
+     * will be preceded by the locale and suffixed by the format if it is not the
+     * default of "44khz_16bit_mono".
      *
      * Sample: "en-US_00a2653ac5f77063bc4ea2fee87318d3"
      */
-    private String getUniqueFilenameForText(String text, String locale, String voice) {
+    private String getUniqueFilenameForText(String text, String locale, String voice, String format) {
         try {
             byte[] bytesOfMessage = text.getBytes(StandardCharsets.UTF_8);
             MessageDigest md = MessageDigest.getInstance("MD5");
@@ -106,6 +108,9 @@ private String getUniqueFilenameForText(String text, String locale, String voice
                 filename += voice + "_";
             }
             filename += hashtext;
+            if (!Objects.equals(format, "44khz_16bit_mono")) {
+                filename += "_" + format;
+            }
             return filename;
         } catch (NoSuchAlgorithmException ex) {
             // should not happen

diff --git a/...voicerss/src/main/java/org/openhab/voice/voicerss/internal/cloudapi/VoiceRSSCloudAPI.java b/...voicerss/src/main/java/org/openhab/voice/voicerss/internal/cloudapi/VoiceRSSCloudAPI.java
@@ -41,7 +41,7 @@ public interface VoiceRSSCloudAPI {
      *
      * @return A set of all audio formats supported
      */
-    Set<String> getAvailableAudioFormats();
+    Set<AudioFormat> getAvailableAudioFormats();
 
     /**
      * Get all supported voices.
@@ -70,13 +70,15 @@ public interface VoiceRSSCloudAPI {
      *            the locale to use
      * @param voice
      *            the voice to use, "default" for the default voice
+     * @param audioCodec
+     *            the audio codec to use
      * @param audioFormat
      *            the audio format to use
      * @return an InputStream to the audio data in specified format
      * @throws IOException
      *             will be raised if the audio data can not be retrieved from
      *             cloud service
      */
-    InputStream getTextToSpeech(String apiKey, String text, String locale, String voice, String audioFormat)
-            throws IOException;
+    InputStream getTextToSpeech(String apiKey, String text, String locale, String voice, String audioCodec,
+            String audioFormat) throws IOException;
 }
diff --git a/...oicerss/src/main/java/org/openhab/voice/voicerss/internal/cloudapi/VoiceRSSCloudImpl.java b/...oicerss/src/main/java/org/openhab/voice/voicerss/internal/cloudapi/VoiceRSSCloudImpl.java
@@ -12,8 +12,6 @@
  */
 package org.openhab.voice.voicerss.internal.cloudapi;
 
-import static java.util.stream.Collectors.toSet;
-
 import java.io.IOException;
 import java.io.InputStream;
 import java.net.HttpURLConnection;
@@ -28,8 +26,8 @@
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Set;
-import java.util.stream.Stream;
 
+import org.openhab.core.audio.AudioFormat;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -41,21 +39,51 @@
  * <ul>
  * <li>All API languages supported</li>
  * <li>Only default voice supported with good audio quality</li>
- * <li>Only MP3, OGG and AAC audio formats supported</li>
+ * <li>MP3, OGG, AAC and WAV audio formats supported</li>
  * <li>It uses HTTP and not HTTPS (for performance reasons)</li>
  * </ul>
  *
  * @author Jochen Hiller - Initial contribution
  * @author Laurent Garnier - add support for all API languages
  * @author Laurent Garnier - add support for OGG and AAC audio formats
+ * @author Andreas Brenk - add support for WAV audio format
  */
 public class VoiceRSSCloudImpl implements VoiceRSSCloudAPI {
 
     public static final String DEFAULT_VOICE = "default";
 
     private final Logger logger = LoggerFactory.getLogger(VoiceRSSCloudImpl.class);
 
-    private static final Set<String> SUPPORTED_AUDIO_FORMATS = Stream.of("MP3", "OGG", "AAC").collect(toSet());
+    private static final Set<AudioFormat> SUPPORTED_AUDIO_FORMATS = Set.of(
+            new AudioFormat(AudioFormat.CONTAINER_NONE, AudioFormat.CODEC_MP3, null, 16, null, 44_100L),
+            new AudioFormat(AudioFormat.CONTAINER_OGG, AudioFormat.CODEC_VORBIS, null, 16, null, 44_100L),
+            new AudioFormat(AudioFormat.CONTAINER_NONE, AudioFormat.CODEC_AAC, null, 16, null, 44_100L),
+            new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, null, 8, 64_000, 8_000L),
+            new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, null, 16, 128_000, 8_000L),
+            new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 88_200, 11_025L),
+            new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 176_400, 11_025L),
+            new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 96_000, 12_000L),
+            new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 192_000, 12_000L),
+            new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 128_000, 16_000L),
+            new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 256_000, 16_000L),
+            new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 176_400, 22_050L),
+            new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 352_800, 22_050L),
+            new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 192_000, 24_000L),
+            new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 384_000, 24_000L),
+            new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 256_000, 32_000L),
+            new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 512_000, 32_000L),
+            new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 352_800, 44_100L),
+            new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 705_600, 44_100L),
+            new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_UNSIGNED, false, 8, 384_000, 48_000L),
+            new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_SIGNED, false, 16, 768_000, 48_000L),
+            new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ALAW, null, 8, 64_000, 8_000L),
+            new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ALAW, null, 8, 88_200, 11_025L),
+            new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ALAW, null, 8, 176_400, 22_050L),
+            new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ALAW, null, 8, 352_800, 44_100L),
+            new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ULAW, null, 8, 64_000, 8_000L),
+            new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ULAW, null, 8, 88_200, 11_025L),
+            new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ULAW, null, 8, 176_400, 22_050L),
+            new AudioFormat(AudioFormat.CONTAINER_WAVE, AudioFormat.CODEC_PCM_ULAW, null, 8, 352_800, 44_100L));
 
     private static final Set<Locale> SUPPORTED_LOCALES = new HashSet<>();
     static {
@@ -164,7 +192,7 @@ public class VoiceRSSCloudImpl implements VoiceRSSCloudAPI {
     }
 
     @Override
-    public Set<String> getAvailableAudioFormats() {
+    public Set<AudioFormat> getAvailableAudioFormats() {
         return SUPPORTED_AUDIO_FORMATS;
     }
 
@@ -208,9 +236,9 @@ public Set<String> getAvailableVoices(Locale locale) {
      * dependencies.
      */
     @Override
-    public InputStream getTextToSpeech(String apiKey, String text, String locale, String voice, String audioFormat)
-            throws IOException {
-        String url = createURL(apiKey, text, locale, voice, audioFormat);
+    public InputStream getTextToSpeech(String apiKey, String text, String locale, String voice, String audioCodec,
+            String audioFormat) throws IOException {
+        String url = createURL(apiKey, text, locale, voice, audioCodec, audioFormat);
         logger.debug("Call {}", url);
         URLConnection connection = new URL(url).openConnection();
 
@@ -254,13 +282,15 @@ public InputStream getTextToSpeech(String apiKey, String text, String locale, St
      *
      * It is in package scope to be accessed by tests.
      */
-    private String createURL(String apiKey, String text, String locale, String voice, String audioFormat) {
+    private String createURL(String apiKey, String text, String locale, String voice, String audioCodec,
+            String audioFormat) {
         String encodedMsg = URLEncoder.encode(text, StandardCharsets.UTF_8);
-        String url = "http://api.voicerss.org/?key=" + apiKey + "&hl=" + locale + "&c=" + audioFormat;
+        String url = "http://api.voicerss.org/?key=" + apiKey + "&hl=" + locale + "&c=" + audioCodec + "&f="
+                + audioFormat;
         if (!DEFAULT_VOICE.equals(voice)) {
             url += "&v=" + voice;
         }
-        url += "&f=44khz_16bit_mono&src=" + encodedMsg;
+        url += "&src=" + encodedMsg;
         return url;
     }
 }
diff --git a/....openhab.voice.voicerss/src/main/java/org/openhab/voice/voicerss/tool/CreateTTSCache.java b/....openhab.voice.voicerss/src/main/java/org/openhab/voice/voicerss/tool/CreateTTSCache.java
@@ -106,7 +106,7 @@ private void generateCacheForMessage(String apiKey, String cacheDir, String loca
             return;
         }
         CachedVoiceRSSCloudImpl impl = new CachedVoiceRSSCloudImpl(cacheDir);
-        File cachedFile = impl.getTextToSpeechAsFile(apiKey, trimmedMsg, locale, voice, "MP3");
+        File cachedFile = impl.getTextToSpeechAsFile(apiKey, trimmedMsg, locale, voice, "MP3", null);
         System.out.println(
                 "Created cached audio for locale='" + locale + "', msg='" + trimmedMsg + "' to file=" + cachedFile);
     }

diff --git a/...cerss/src/test/java/org/openhab/voice/voicerss/internal/CompatibleAudioFormatMatcher.java b/...cerss/src/test/java/org/openhab/voice/voicerss/internal/CompatibleAudioFormatMatcher.java
@@ -0,0 +1,52 @@
+/**
+ * Copyright (c) 2010-2022 Contributors to the openHAB project
+ *
+ * See the NOTICE file(s) distributed with this work for additional
+ * information.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ */
+package org.openhab.voice.voicerss.internal;
+
+import org.hamcrest.Description;
+import org.hamcrest.Matcher;
+import org.hamcrest.TypeSafeMatcher;
+import org.openhab.core.audio.AudioFormat;
+
+/**
+ * Hamcrest {@link Matcher} to assert a compatible {@link AudioFormat}.
+ *
+ * @author Andreas Brenk - Initial contribution
+ */
+public class CompatibleAudioFormatMatcher extends TypeSafeMatcher<AudioFormat> {
+
+    private final AudioFormat audioFormat;
+
+    public CompatibleAudioFormatMatcher(AudioFormat audioFormat) {
+        this.audioFormat = audioFormat;
+    }
+
+    @Override
+    protected boolean matchesSafely(AudioFormat actual) {
+        return audioFormat.isCompatible(actual);
+    }
+
+    @Override
+    public void describeTo(Description description) {
+        description.appendText("an audio format compatible to ").appendValue(audioFormat);
+    }
+
+    /**
+     * Creates a matcher that matches when the examined object is
+     * compatible to the specified <code>audioFormat</code>.
+     *
+     * @param audioFormat the audio format which must be compatible
+     */
+    public static Matcher<AudioFormat> compatibleAudioFormat(AudioFormat audioFormat) {
+        return new CompatibleAudioFormatMatcher(audioFormat);
+    }
+}