Integrate latest SVOX code drop.

commit: 54823983fdcb4ee88d9a6c4e680ff2064dd95929 [log] [tgz]
author: Jean-Michel Trivi <jmtrivi@google.com> Fri Jun 19 14:11:46 2009 -0700
committer: Jean-Michel Trivi <jmtrivi@google.com> Fri Jun 19 14:31:00 2009 -0700
tree: b83388c5f0d4c4e086745eb981b497e9f098e5d8
parent: 0a6cc22595ba732fe3fd887c5b20ba22d1c730f7 [diff]
diff --git a/tts/com_svox_picottsengine.cpp b/tts/com_svox_picottsengine.cpp
index 85d149b..2fec253 100644
--- a/tts/com_svox_picottsengine.cpp
+++ b/tts/com_svox_picottsengine.cpp

@@ -35,14 +35,18 @@
 /* adaptation layer defines */
 #define PICO_MEM_SIZE       2500000
 #define PICO_MIN_RATE       20
+/* speaking rate    */
 #define PICO_DEF_RATE       100
 #define PICO_MAX_RATE       500
 #define PICO_MIN_PITCH      50
+/* speaking pitch   */
 #define PICO_DEF_PITCH      100
 #define PICO_MAX_PITCH      200
 #define PICO_MIN_VOLUME     0
+/* speaking volume  */
 #define PICO_DEF_VOLUME     250
 #define PICO_MAX_VOLUME     500
+/* string constants */
 #define MAX_OUTBUF_SIZE     128
 const char* PICO_LINGWARE_PATH              = "/sdcard/svox/";
 const char* PICO_VOICE_NAME                 = "PicoVoice";
@@ -52,10 +56,11 @@
 const char* PICO_PITCH_CLOSE_TAG            = "</pitch>";
 const char* PICO_VOLUME_OPEN_TAG            = "<volume level='%d'>";
 const char* PICO_VOLUME_CLOSE_TAG           = "</volume>";
+const char* PICO_PHONEME_OPEN_TAG           = "<phoneme ph=\"%s\">";
 
+/* supported voices     */
 const char* picoSupportedLangIso3[]         = { "eng",               "eng",               "deu",               "spa",               "fra",               "ita" };
 const char* picoSupportedCountryIso3[]      = { "USA",               "GB",               "DEU",               "ESP",               "FRA",               "ITA" };
-
 const char* picoSupportedLang[]             = { "en-rUS",           "en-rGB",           "de-rDE",           "es-rES",           "fr-rFR",           "it-rIT" };
 const char* picoInternalLang[]              = { "en-US",            "en-GB",            "de-DE",            "es-ES",            "fr-FR",            "it-IT" };
 const char* picoInternalTaLingware[]        = { "en-US_ta.bin",     "en-GB_ta.bin",     "de-DE_ta.bin",     "es-ES_ta.bin",     "fr-FR_ta.bin",     "it-IT_ta.bin" };
@@ -63,41 +68,45 @@
 const char* picoInternalUtppLingware[]      = { "en-US_utpp.bin",   "en-GB_utpp.bin",   "de-DE_utpp.bin",   "es-ES_utpp.bin",   "fr-FR_utpp.bin",   "it-IT_utpp.bin" };
 const int picoNumSupportedLang              = 6;
 
+/* supported properties */
 const char* picoSupportedProperties[]       = { "language", "rate", "pitch", "volume" };
 const int picoNumSupportedProperties        = 4;
 
-/* adapation layer globals */
+
+/* adapation layer global variables */
 synthDoneCB_t* picoSynthDoneCBPtr;
 void* picoMemArea = NULL;
-pico_System picoSystem = NULL;
-pico_Resource picoTaResource = NULL;
-pico_Resource picoSgResource = NULL;
-pico_Resource picoUtppResource = NULL;
-pico_Engine picoEngine = NULL;
+pico_System     picoSystem = NULL;
+pico_Resource   picoTaResource = NULL;
+pico_Resource   picoSgResource = NULL;
+pico_Resource   picoUtppResource = NULL;
+pico_Engine     picoEngine = NULL;
 pico_Char* picoTaFileName = NULL;
 pico_Char* picoSgFileName = NULL;
 pico_Char* picoUtppFileName = NULL;
 pico_Char* picoTaResourceName = NULL;
 pico_Char* picoSgResourceName = NULL;
 pico_Char* picoUtppResourceName = NULL;
-int picoSynthAbort = 0;
-char* picoProp_currLang = NULL;
-int picoProp_currRate = PICO_DEF_RATE;
-int picoProp_currPitch = PICO_DEF_PITCH;
-int picoProp_currVolume = PICO_DEF_VOLUME;
+int     picoSynthAbort = 0;
+char*   picoProp_currLang   = NULL;                 /* current language */
+int     picoProp_currRate   = PICO_DEF_RATE;        /* current rate     */
+int     picoProp_currPitch  = PICO_DEF_PITCH;       /* current pitch    */
+int     picoProp_currVolume = PICO_DEF_VOLUME;      /* current volume   */
+
 
 
 /* internal helper functions */
 
 /** checkForLanguage
- *  Checks if the requested language is among the supported languages
+ *  Check if the requested language is among the supported languages.
  *  @language -  the language to check, either in xx or xx-rYY format
- *  return index of the language, or -1 if not supported
+ *  return index of the language, or -1 if not supported.
 */
-static int checkForLanguage(const char* language)
+static int checkForLanguage( const char * language )
 {
-    // verify that it's a language we support
-    int found = -1;
+     int found = -1;                                         /* language not found   */
+
+    /* Verify that the requested locale is a locale that we support.    */
     for (int i = 0; i < picoNumSupportedLang; i++)
     {
         if (strcmp(language, picoSupportedLang[i]) == 0)
@@ -105,10 +114,11 @@
             found = i;
             break;
         }
-    }
+    };
     if (found < 0)
     {
-        // didn't find an exact match, may have been specified with only the first 2 characters
+        /* We didn't find an exact match; it may have been specified with only the first 2 characters.
+           This could overmatch ISO 639-3 language codes.                                   */
         for (int i = 0; i < picoNumSupportedLang; i++)
         {
             if (strncmp(language, picoSupportedLang[i], 2) == 0)
@@ -121,42 +131,44 @@
         {
             LOGE("TtsEngine::set language called with unsupported language");
         }
-    }
+    };
     return found;
 }
 
+
 /** cleanResources
- *  Unloads any loaded pico resources
+ *  Unloads any loaded Pico resources.
 */
-static void cleanResources()
+static void cleanResources( void )
 {
     if (picoEngine)
     {
-        pico_disposeEngine(picoSystem, &picoEngine);
+        pico_disposeEngine( picoSystem, &picoEngine );
         pico_releaseVoiceDefinition(picoSystem, (pico_Char*)PICO_VOICE_NAME);
         picoEngine = NULL;
     }
     if (picoUtppResource)
     {
-        pico_unloadResource(picoSystem, &picoUtppResource);
+        pico_unloadResource( picoSystem, &picoUtppResource );
         picoUtppResource = NULL;
     }
     if (picoTaResource)
     {
-        pico_unloadResource(picoSystem, &picoTaResource);
+        pico_unloadResource( picoSystem, &picoTaResource );
         picoTaResource = NULL;
     }
     if (picoSgResource)
     {
-        pico_unloadResource(picoSystem, &picoSgResource);
+        pico_unloadResource( picoSystem, &picoSgResource );
         picoSgResource = NULL;
     }
 }
 
+
 /** cleanFiles
- *  Frees any memory allocated for file and resource strings
+ *  Frees any memory allocated for file and resource strings.
 */
-static void cleanFiles()
+static void cleanFiles( void )
 {
     if (picoProp_currLang)
     {
@@ -383,8 +395,8 @@
 }
 
 /** doAddProperties
- *  add <speed>, <pitch> and <volume> tags to text if properties have been set to non-default values
- *  and returns the new string. Calling function is responsible for freeing returned string
+ *  Add <speed>, <pitch> and <volume> tags to text, if the properties have been set to non-default values,
+ *  and return the new string.  The calling function is responsible for freeing the returned string.
  *  @str - text to apply tags to
  *  return new string with tags applied
 */
@@ -394,19 +406,19 @@
     int haspitch = 0, hasspeed = 0, hasvol = 0;
     int textlen = strlen(str) + 1;
 
-    if (picoProp_currPitch != PICO_DEF_PITCH)
+    if (picoProp_currPitch != PICO_DEF_PITCH)           /* non-default pitch    */
     {
         textlen += strlen(PICO_PITCH_OPEN_TAG) + 5;
         textlen += strlen(PICO_PITCH_CLOSE_TAG);
         haspitch = 1;
     }
-    if (picoProp_currRate != PICO_DEF_RATE)
+    if (picoProp_currRate != PICO_DEF_RATE)             /* non-default rate     */
     {
         textlen += strlen(PICO_SPEED_OPEN_TAG) + 5;
         textlen += strlen(PICO_SPEED_CLOSE_TAG);
         hasspeed = 1;
     }
-    if (picoProp_currVolume != PICO_DEF_VOLUME)
+    if (picoProp_currVolume != PICO_DEF_VOLUME)         /* non-default volume   */
     {
         textlen += strlen(PICO_VOLUME_OPEN_TAG) + 5;
         textlen += strlen(PICO_VOLUME_CLOSE_TAG);
@@ -418,7 +430,7 @@
     {
         return NULL;
     }
-    memset(data, 0, textlen);
+    memset(data, 0, textlen);                           /* clear it             */
     if (haspitch)
     {
         char* tmp = (char*)malloc(strlen(PICO_PITCH_OPEN_TAG) + strlen(PICO_PITCH_CLOSE_TAG) + 5);
@@ -463,14 +475,15 @@
     return data;
 }
 
+
 /* API function implementations */
 
 /** init
- *  allocates pico memory block and initializes pico system
+ *  Allocates Pico memory block and initializes Pico system.
  *  synthDoneCBPtr - Pointer to callback function which will receive generated samples
  *  return tts_result
 */
-tts_result TtsEngine::init(synthDoneCB_t synthDoneCBPtr)
+tts_result TtsEngine::init( synthDoneCB_t synthDoneCBPtr )
 {
     if (synthDoneCBPtr == NULL)
     {
@@ -498,11 +511,12 @@
     return TTS_SUCCESS;
 }
 
+
 /** shutdown
- *  unloads all pico resources, terminates pico system and frees pico memory block
+ *  Unloads all Pico resources; terminates Pico system and frees Pico memory block.
  *  return tts_result
 */
-tts_result TtsEngine::shutdown()
+tts_result TtsEngine::shutdown( void )
 {
     cleanResources();
 
@@ -523,12 +537,12 @@
 }
 
 /** loadLanguage
- *  Load a new language
+ *  Load a new language.
  *  @value - language string in xx or xx-rYY format (i.e. "en" or "en-rUS")
  *  @size - size of value
  *  return tts_result
 */
-tts_result TtsEngine::loadLanguage(const char *value, const size_t size)
+tts_result TtsEngine::loadLanguage( const char * value, const size_t size )
 {
     return setProperty("language", value, size);
 }
@@ -597,26 +611,27 @@
 
 
 /** getLanguage
- *  Get currently loaded language - if any
+ *  Get the currently loaded language - if any.
  *  @value - buffer which will receive value
  *  @iosize - size of value - if value is too small to contain the return string, this will contain the actual size needed
  *  return tts_result
 */
-tts_result TtsEngine::getLanguage(char *value, size_t *iosize)
+tts_result TtsEngine::getLanguage( char * value, size_t * iosize )
 {
     return getProperty("language", value, iosize);
 }
 
+
 /** setProperty
- *  set property, supported properties are language, rate, pitch and volume
+ *  Set property. The supported properties are:  language, rate, pitch and volume.
  *  @property - name of property to set
  *  @value - value to set
  *  @size - size of value
  *  return tts_result
 */
-tts_result TtsEngine::setProperty(const char *property, const char *value, const size_t size)
+tts_result TtsEngine::setProperty( const char * property, const char * value, const size_t size )
 {
-    // sanity check
+    /* Sanity check */
     if (property == NULL)
     {
         LOGE("setProperty called with property NULL");
@@ -677,8 +692,9 @@
     return TTS_PROPERTY_UNSUPPORTED;
 }
 
+
 /** getProperty
- *  get property, supported properties are language, rate, pitch and volume
+ *  Get the property.  Supported properties are:  language, rate, pitch and volume.
  *  @property - name of property to get
  *  @value - buffer which will receive value of property
  *  @iosize - size of value - if size is too small on return this will contain actual size needed
@@ -686,7 +702,7 @@
 */
 tts_result TtsEngine::getProperty(const char *property, char *value, size_t* iosize)
 {
-    // sanity check
+    /* sanity check */
     if (property == NULL)
     {
         LOGE("getProperty called with property NULL");
@@ -759,8 +775,9 @@
     }
 }
 
+
 /** synthesizeText
- *  synthesizes a text string
+ *  Synthesizes a text string.
  *  @text - text to synthesize
  *  @buffer - buffer which will receive generated samples
  *  @bufferSize - size of buffer
@@ -788,7 +805,7 @@
         return TTS_FAILURE;
     }
 
-    // add property tags to string - if any
+    /* Add property tags to the string - if any.    */
     local_text = (pico_Char*)doAddProperties(text);
     if (!local_text)
     {
@@ -802,7 +819,7 @@
 
     size_t bufused = 0;
 
-    // synthesis loop
+    /* synthesis loop   */
     while (text_remaining)
     {
         if (picoSynthAbort)
@@ -811,7 +828,7 @@
             break;
         }
 
-        // feed text into engine
+        /* Feed the text into the engine.   */
         ret = pico_putTextUtf8(picoEngine, inp, text_remaining, &bytes_sent);
         if (ret != PICO_OK)
         {
@@ -828,18 +845,18 @@
             {
                 break;
             }
-            // retrieve samples and add to buffer
+            /* Retrieve the samples and add them to the buffer. */
             ret = pico_getData(picoEngine, (void*)outbuf, MAX_OUTBUF_SIZE, &bytes_recv, &out_data_type);
             if (bytes_recv)
             {
-                if (bufused + bytes_recv <= bufferSize)
+                if ((bufused + bytes_recv) <= bufferSize)
                 {
                     memcpy(buffer+bufused, (int8_t*)outbuf, bytes_recv);
                     bufused += bytes_recv;
                 }
                 else
                 {
-                    // buffer filled, pass on to callback function
+                    /* The buffer filled; pass this on to the callback function.    */
                     int cbret = picoSynthDoneCBPtr(userdata, 16000, AudioSystem::PCM_16_BIT, 1, buffer, bufused, TTS_SYNTH_PENDING);
                     if (cbret == TTS_CALLBACK_HALT)
                     {
@@ -854,10 +871,11 @@
             }
         } while (PICO_STEP_BUSY == ret);
 
-        // synthesis is finished, notify caller and pass remaining samples
+        /* The synthesis is finished; notify the caller and pass the remaining samples.
+           Use 16 KHz, 16-bit samples.                                              */
         if (!picoSynthAbort)
         {
-            picoSynthDoneCBPtr(userdata, 16000, AudioSystem::PCM_16_BIT, 1, buffer, bufused, TTS_SYNTH_DONE);
+            picoSynthDoneCBPtr( userdata, 16000, AudioSystem::PCM_16_BIT, 1, buffer, bufused, TTS_SYNTH_DONE);
         }
         picoSynthAbort = 0;
 
@@ -873,22 +891,130 @@
     return TTS_SUCCESS;
 }
 
+
 /** synthesizeIpa
- *  synthesizes a phonetic string in IPA format
+ *  Synthesizes a phonetic string in IPA format.
  *  @ipa - phonetic string to synthesize
  *  @buffer - buffer which will receive generated samples
  *  @bufferSize - size of buffer
  *  @userdata - pointer to user data which will be passed back to callback function
  *  return tts_result
 */
-tts_result TtsEngine::synthesizeIpa(const char * /*ipa*/, int8_t * /*buffer*/, size_t /*bufferSize*/, void * /*userdata*/)
+tts_result TtsEngine::synthesizeIpa( const char * ipa, int8_t * buffer, size_t bufferSize, void * userdata )
 {
-    LOGI("synthIPA not supported in this release");
-    return TTS_FEATURE_UNSUPPORTED;
+    pico_Char*  inp = NULL;
+    pico_Char*  local_text = NULL;
+    short       outbuf[MAX_OUTBUF_SIZE/2];
+    pico_Int16  bytes_sent, bytes_recv, text_remaining, out_data_type;
+    pico_Status ret;
+
+    picoSynthAbort = 0;
+    if (ipa == NULL)
+    {
+        LOGE("synthesizeIpa called with NULL string");
+        return TTS_FAILURE;
+    }
+
+    if (buffer == NULL)
+    {
+        LOGE("synthesizeIpa called with NULL buffer");
+        return TTS_FAILURE;
+    }
+
+    /* Append phoneme tag. %%%
+       <phoneme ph="xxx"/>  */
+
+    /* Add property tags to the string - if any.    */
+    local_text = (pico_Char*)doAddProperties( ipa );
+    if (!local_text)
+    {
+        LOGE("Failed to allocate memory for text string");
+        return TTS_FAILURE;
+    }
+
+    text_remaining = strlen((const char*)local_text) + 1;
+
+    inp = (pico_Char*)local_text;
+
+    size_t bufused = 0;
+
+    /* synthesis loop   */
+    while (text_remaining)
+    {
+        if (picoSynthAbort)
+        {
+            ret = pico_resetEngine( picoEngine );
+            break;
+        }
+
+        /* Feed the text into the engine.   */
+        ret = pico_putTextUtf8( picoEngine, inp, text_remaining, &bytes_sent );
+        if (ret != PICO_OK)
+        {
+            LOGE("Error synthesizing string '%s': [%d]", ipa, ret);
+            if (local_text) free(local_text);
+            return TTS_FAILURE;
+        }
+
+        /* Process the remaining string.    */
+        text_remaining -= bytes_sent;
+        inp += bytes_sent;
+        do
+        {
+            if (picoSynthAbort)
+            {
+                break;
+            }
+            /* Retrieve the samples and add them to the buffer. */
+            ret = pico_getData( picoEngine, (void*)outbuf, MAX_OUTBUF_SIZE, &bytes_recv, &out_data_type );
+            if (bytes_recv)
+            {
+                if ((bufused + bytes_recv) <= bufferSize)
+                {
+                    memcpy(buffer+bufused, (int8_t*)outbuf, bytes_recv);
+                    bufused += bytes_recv;
+                }
+                else
+                {
+                    /* The buffer filled; pass this on to the callback function.    */
+                    int cbret = picoSynthDoneCBPtr(userdata, 16000, AudioSystem::PCM_16_BIT, 1, buffer, bufused, TTS_SYNTH_PENDING);
+                    if (cbret == TTS_CALLBACK_HALT)
+                    {
+                        LOGI("Halt requested by caller. Halting.");
+                        picoSynthAbort = 1;
+                        break;
+                    }
+                    bufused = 0;
+                    memcpy(buffer, (int8_t*)outbuf, bytes_recv);
+                    bufused += bytes_recv;
+                }
+            }
+        } while (PICO_STEP_BUSY == ret);
+
+        /* The synthesis is finished; notify the caller and pass the remaining samples.
+           Use 16 KHz, 16-bit samples.                                              */
+        if (!picoSynthAbort)
+        {
+            picoSynthDoneCBPtr( userdata, 16000, AudioSystem::PCM_16_BIT, 1, buffer, bufused, TTS_SYNTH_DONE );
+        }
+        picoSynthAbort = 0;                 /* succeeded    */
+
+        if (ret != PICO_STEP_IDLE)
+        {
+            LOGE("Error occurred during synthesis [%d]", ret);
+            if (local_text) free(local_text);
+            return TTS_FAILURE;
+        }
+    }
+
+    if (local_text)
+        free(local_text);
+    return TTS_SUCCESS;             /* succeeded    */
 }
 
+
 /** stop
- *  aborts running synthesis
+ *  Aborts the running synthesis.
  *  return tts_result
 */
 tts_result TtsEngine::stop()
@@ -897,6 +1023,7 @@
     return TTS_SUCCESS;
 }
 
+
 #ifdef __cplusplus
 extern "C" {
 #endif
commit	54823983fdcb4ee88d9a6c4e680ff2064dd95929	[log] [tgz]
author	Jean-Michel Trivi <jmtrivi@google.com>	Fri Jun 19 14:11:46 2009 -0700
committer	Jean-Michel Trivi <jmtrivi@google.com>	Fri Jun 19 14:31:00 2009 -0700
tree	b83388c5f0d4c4e086745eb981b497e9f098e5d8
parent	0a6cc22595ba732fe3fd887c5b20ba22d1c730f7 [diff]