OSDN Git Service

Updating TtsEngine.h and SynthProxy.cpp so that buffer memory
authorCharles Chen <clchen@google.com>
Fri, 5 Jun 2009 20:58:33 +0000 (13:58 -0700)
committerCharles Chen <clchen@google.com>
Fri, 5 Jun 2009 20:58:33 +0000 (13:58 -0700)
management is handled on the system side.

include/tts/TtsEngine.h
tts/jni/android_tts_SynthProxy.cpp [changed mode: 0755->0644]

index bf62995..e0220ea 100644 (file)
 
 namespace android {
 
+enum tts_synth_status {
+    TTS_SYNTH_DONE              = 0,
+    TTS_SYNTH_PENDING           = 1
+};
+
+enum tts_callback_status {
+    TTS_CALLBACK_HALT           = 0,
+    TTS_CALLBACK_CONTINUE       = 1
+};
+
 // The callback is used by the implementation of this interface to notify its
 // client, the Android TTS service, that the last requested synthesis has been
-// completed.
+// completed. // TODO reword
 // The callback for synthesis completed takes:
-//    void *       - The userdata pointer set in the original synth call
-//    uint32_t     - Track sampling rate in Hz
-//    audio_format - The AudioSystem::audio_format enum
-//    int          - The number of channels
-//    int8_t *     - A buffer of audio data only valid during the execution of the callback
-//    size_t       - The size of the buffer
-// Note about memory management:
-//    The implementation of TtsEngine is responsible for the management of the memory
-//    it allocates to store the synthesized speech. After the execution of the callback
-//    to hand the synthesized data to the client of TtsEngine, the TTS engine is
-//    free to reuse or free the previously allocated memory.
-//    This implies that the implementation of the "synthDoneCB" callback cannot use
-//    the pointer to the buffer of audio samples outside of the callback itself.
-typedef void (synthDoneCB_t)(void *, uint32_t, AudioSystem::audio_format, int, int8_t *, size_t);
+//    [inout] void *&      - The userdata pointer set in the original synth call
+//    [in]    uint32_t     - Track sampling rate in Hz
+//    [in]    audio_format - The AudioSystem::audio_format enum
+//    [in]    int          - The number of channels
+//    [inout] int8_t *&     - A buffer of audio data only valid during the execution of the callback
+//    [inout] size_t  &     - The size of the buffer
+//    [in]    tts_synth_status  - Status of the synthesis; 0 for done, 1 for more data to be synthesized.
+// Returns the status of the consumer of the synthesis. 0 for stop, 1 for continue.
+typedef tts_callback_status (synthDoneCB_t)(void *&, uint32_t, AudioSystem::audio_format, int, int8_t *&, size_t&, tts_synth_status);
 
 class TtsEngine;
 extern "C" TtsEngine* getTtsEngine();
@@ -155,13 +160,13 @@ public:
     // @param text      the UTF-8 text to synthesize
     // @param userdata  pointer to be returned when the call is invoked
     // @return          TTS_SUCCESS or TTS_FAILURE
-    virtual tts_result synthesizeText(const char *text, void *userdata);
+    virtual tts_result synthesizeText(const char *text, int8_t *buffer, size_t bufferSize, void *userdata);
 
     // Synthesize IPA text. When synthesis completes, the engine must call the given callback to notify the TTS API.
     // @param ipa      the IPA data to synthesize
     // @param userdata  pointer to be returned when the call is invoked
     // @return TTS_FEATURE_UNSUPPORTED if IPA is not supported, otherwise TTS_SUCCESS or TTS_FAILURE
-    virtual tts_result synthesizeIpa(const char *ipa, void *userdata);
+    virtual tts_result synthesizeIpa(const char *ipa, int8_t *buffer, size_t bufferSize, void *userdata);
 };
 
 } // namespace android
old mode 100755 (executable)
new mode 100644 (file)
index d8f1bf3..582e621
@@ -32,6 +32,7 @@
 #define DEFAULT_TTS_RATE        16000
 #define DEFAULT_TTS_FORMAT      AudioSystem::PCM_16_BIT
 #define DEFAULT_TTS_NB_CHANNELS 1
+#define DEFAULT_TTS_BUFFERSIZE  1024
 
 #define USAGEMODE_PLAY_IMMEDIATELY 0
 #define USAGEMODE_WRITE_TO_FILE    1
@@ -64,6 +65,8 @@ class SynthProxyJniStorage {
         uint32_t                  mSampleRate;
         AudioSystem::audio_format mAudFormat;
         int                       mNbChannels;
+        int8_t *                  mBuffer;
+        size_t                    mBufferSize;
 
         SynthProxyJniStorage() {
             //tts_class = NULL;
@@ -73,6 +76,8 @@ class SynthProxyJniStorage {
             mSampleRate = DEFAULT_TTS_RATE;
             mAudFormat  = DEFAULT_TTS_FORMAT;
             mNbChannels = DEFAULT_TTS_NB_CHANNELS;
+            mBufferSize = DEFAULT_TTS_BUFFERSIZE;
+            mBuffer = new int8_t[mBufferSize];
         }
 
         ~SynthProxyJniStorage() {
@@ -81,6 +86,7 @@ class SynthProxyJniStorage {
                 mNativeSynthInterface->shutdown();
                 mNativeSynthInterface = NULL;
             }
+            delete mBuffer;
         }
 
         void killAudio() {
@@ -159,23 +165,27 @@ void prepAudioTrack(SynthProxyJniStorage* pJniData,
  * Callback from TTS engine.
  * Directly speaks using AudioTrack or write to file
  */
-static void ttsSynthDoneCB(void * userdata, uint32_t rate,
+static tts_callback_status ttsSynthDoneCB(void *& userdata, uint32_t rate,
                            AudioSystem::audio_format format, int channel,
-                           int8_t *wav, size_t bufferSize) {
+                           int8_t *&wav, size_t &bufferSize, tts_synth_status status) {
     LOGI("ttsSynthDoneCallback: %d bytes", bufferSize);
 
+    if (userdata == NULL){
+        LOGE("userdata == NULL");
+        return TTS_CALLBACK_HALT;
+    }
     afterSynthData_t* pForAfter = (afterSynthData_t*)userdata;
+    SynthProxyJniStorage* pJniData = (SynthProxyJniStorage*)(pForAfter->jniStorage);
 
     if (pForAfter->usageMode == USAGEMODE_PLAY_IMMEDIATELY){
         LOGI("Direct speech");
 
         if (wav == NULL) {
+            delete pForAfter;
             LOGI("Null: speech has completed");
         }
 
         if (bufferSize > 0) {
-            SynthProxyJniStorage* pJniData =
-                    (SynthProxyJniStorage*)(pForAfter->jniStorage);
             prepAudioTrack(pJniData, rate, format, channel);
             if (pJniData->mAudioOut) {
                 pJniData->mAudioOut->write(wav, bufferSize);
@@ -187,6 +197,7 @@ static void ttsSynthDoneCB(void * userdata, uint32_t rate,
     } else  if (pForAfter->usageMode == USAGEMODE_WRITE_TO_FILE) {
         LOGI("Save to file");
         if (wav == NULL) {
+            delete pForAfter;
             LOGI("Null: speech has completed");
         }
         if (bufferSize > 0){
@@ -195,10 +206,17 @@ static void ttsSynthDoneCB(void * userdata, uint32_t rate,
     }
     // TODO update to call back into the SynthProxy class through the
     //      javaTTSFields.synthProxyMethodPost methode to notify
-    //      playback has completed
+    //      playback has completed if the synthesis is done, i.e.
+    //      if status == TTS_SYNTH_DONE
+    //delete pForAfter;
+
+    // we don't update the wav (output) parameter as we'll let the next callback
+    // write at the same location, we've consumed the data already, but we need
+    // to update bufferSize to let the TTS engine know how much it can write the
+    // next time it calls this function.
+    bufferSize = pJniData->mBufferSize;
 
-    delete pForAfter;
-    return;
+    return TTS_CALLBACK_CONTINUE;
 }
 
 
@@ -223,7 +241,9 @@ android_tts_SynthProxy_native_setup(JNIEnv *env, jobject thiz,
     } else {
         TtsEngine *(*get_TtsEngine)() =
             reinterpret_cast<TtsEngine* (*)()>(dlsym(engine_lib_handle, "getTtsEngine"));
+
         pJniStorage->mNativeSynthInterface = (*get_TtsEngine)();
+
         if (pJniStorage->mNativeSynthInterface) {
             pJniStorage->mNativeSynthInterface->init(ttsSynthDoneCB);
         }
@@ -323,7 +343,7 @@ android_tts_SynthProxy_synthesizeToFile(JNIEnv *env, jobject thiz, jint jniData,
 
     // TODO check return codes
     if (pSynthData->mNativeSynthInterface) {
-        pSynthData->mNativeSynthInterface->synthesizeText(textNativeString,
+        pSynthData->mNativeSynthInterface->synthesizeText(textNativeString, pSynthData->mBuffer, pSynthData->mBufferSize,
                 (void *)pForAfter);
     }
 
@@ -395,7 +415,7 @@ android_tts_SynthProxy_speak(JNIEnv *env, jobject thiz, jint jniData,
 
     if (pSynthData->mNativeSynthInterface) {
         const char *textNativeString = env->GetStringUTFChars(textJavaString, 0);
-        pSynthData->mNativeSynthInterface->synthesizeText(textNativeString,
+        pSynthData->mNativeSynthInterface->synthesizeText(textNativeString, pSynthData->mBuffer, pSynthData->mBufferSize,
                 (void *)pForAfter);
         env->ReleaseStringUTFChars(textJavaString, textNativeString);
     }
@@ -442,6 +462,7 @@ static void
 android_tts_SynthProxy_playAudioBuffer(JNIEnv *env, jobject thiz, jint jniData,
         int bufferPointer, int bufferSize)
 {
+LOGI("android_tts_SynthProxy_playAudioBuffer");
     if (jniData == 0) {
         LOGE("android_tts_SynthProxy_playAudioBuffer(): invalid JNI data");
         return;