Updating TtsEngine.h and SynthProxy.cpp so that buffer memory

author Charles Chen <clchen@google.com>

Fri, 5 Jun 2009 20:58:33 +0000 (13:58 -0700)

committer Charles Chen <clchen@google.com>

Fri, 5 Jun 2009 20:58:33 +0000 (13:58 -0700)
author Charles Chen <clchen@google.com>
Fri, 5 Jun 2009 20:58:33 +0000 (13:58 -0700)
committer Charles Chen <clchen@google.com>
Fri, 5 Jun 2009 20:58:33 +0000 (13:58 -0700)
diff --git a/include/tts/TtsEngine.h b/include/tts/TtsEngine.h

index bf62995..e0220ea 100644 (file)
--- a/include/tts/TtsEngine.h
+++ b/include/tts/TtsEngine.h
@@ -25,24 +25,29 @@
  
  namespace android {
  
+enum tts_synth_status {
+    TTS_SYNTH_DONE              = 0,
+    TTS_SYNTH_PENDING           = 1
+};
+
+enum tts_callback_status {
+    TTS_CALLBACK_HALT           = 0,
+    TTS_CALLBACK_CONTINUE       = 1
+};
+
  // The callback is used by the implementation of this interface to notify its
  // client, the Android TTS service, that the last requested synthesis has been
-// completed.
+// completed. // TODO reword
  // The callback for synthesis completed takes:
-//    void *       - The userdata pointer set in the original synth call
-//    uint32_t     - Track sampling rate in Hz
-//    audio_format - The AudioSystem::audio_format enum
-//    int          - The number of channels
-//    int8_t *     - A buffer of audio data only valid during the execution of the callback
-//    size_t       - The size of the buffer
-// Note about memory management:
-//    The implementation of TtsEngine is responsible for the management of the memory
-//    it allocates to store the synthesized speech. After the execution of the callback
-//    to hand the synthesized data to the client of TtsEngine, the TTS engine is
-//    free to reuse or free the previously allocated memory.
-//    This implies that the implementation of the "synthDoneCB" callback cannot use
-//    the pointer to the buffer of audio samples outside of the callback itself.
-typedef void (synthDoneCB_t)(void *, uint32_t, AudioSystem::audio_format, int, int8_t *, size_t);
+//    [inout] void *&      - The userdata pointer set in the original synth call
+//    [in]    uint32_t     - Track sampling rate in Hz
+//    [in]    audio_format - The AudioSystem::audio_format enum
+//    [in]    int          - The number of channels
+//    [inout] int8_t *&     - A buffer of audio data only valid during the execution of the callback
+//    [inout] size_t  &     - The size of the buffer
+//    [in]    tts_synth_status  - Status of the synthesis; 0 for done, 1 for more data to be synthesized.
+// Returns the status of the consumer of the synthesis. 0 for stop, 1 for continue.
+typedef tts_callback_status (synthDoneCB_t)(void *&, uint32_t, AudioSystem::audio_format, int, int8_t *&, size_t&, tts_synth_status);
  
  class TtsEngine;
  extern "C" TtsEngine* getTtsEngine();
@@ -155,13 +160,13 @@ public:
      // @param text      the UTF-8 text to synthesize
      // @param userdata  pointer to be returned when the call is invoked
      // @return          TTS_SUCCESS or TTS_FAILURE
-    virtual tts_result synthesizeText(const char *text, void *userdata);
+    virtual tts_result synthesizeText(const char *text, int8_t *buffer, size_t bufferSize, void *userdata);
  
      // Synthesize IPA text. When synthesis completes, the engine must call the given callback to notify the TTS API.
      // @param ipa      the IPA data to synthesize
      // @param userdata  pointer to be returned when the call is invoked
      // @return TTS_FEATURE_UNSUPPORTED if IPA is not supported, otherwise TTS_SUCCESS or TTS_FAILURE
-    virtual tts_result synthesizeIpa(const char *ipa, void *userdata);
+    virtual tts_result synthesizeIpa(const char *ipa, int8_t *buffer, size_t bufferSize, void *userdata);
  };
  
  } // namespace android
diff --git a/tts/jni/android_tts_SynthProxy.cpp b/tts/jni/android_tts_SynthProxy.cpp

old mode 100755 (executable)

new mode 100644 (file)

index d8f1bf3..582e621
--- a/tts/jni/android_tts_SynthProxy.cpp
+++ b/tts/jni/android_tts_SynthProxy.cpp
@@ -32,6 +32,7 @@
  #define DEFAULT_TTS_RATE        16000
  #define DEFAULT_TTS_FORMAT      AudioSystem::PCM_16_BIT
  #define DEFAULT_TTS_NB_CHANNELS 1
+#define DEFAULT_TTS_BUFFERSIZE  1024
  
  #define USAGEMODE_PLAY_IMMEDIATELY 0
  #define USAGEMODE_WRITE_TO_FILE    1
@@ -64,6 +65,8 @@ class SynthProxyJniStorage {
          uint32_t                  mSampleRate;
          AudioSystem::audio_format mAudFormat;
          int                       mNbChannels;
+        int8_t *                  mBuffer;
+        size_t                    mBufferSize;
  
          SynthProxyJniStorage() {
              //tts_class = NULL;
@@ -73,6 +76,8 @@ class SynthProxyJniStorage {
              mSampleRate = DEFAULT_TTS_RATE;
              mAudFormat  = DEFAULT_TTS_FORMAT;
              mNbChannels = DEFAULT_TTS_NB_CHANNELS;
+            mBufferSize = DEFAULT_TTS_BUFFERSIZE;
+            mBuffer = new int8_t[mBufferSize];
          }
  
          ~SynthProxyJniStorage() {
@@ -81,6 +86,7 @@ class SynthProxyJniStorage {
                  mNativeSynthInterface->shutdown();
                  mNativeSynthInterface = NULL;
              }
+            delete mBuffer;
          }
  
          void killAudio() {
@@ -159,23 +165,27 @@ void prepAudioTrack(SynthProxyJniStorage* pJniData,
   * Callback from TTS engine.
   * Directly speaks using AudioTrack or write to file
   */
-static void ttsSynthDoneCB(void * userdata, uint32_t rate,
+static tts_callback_status ttsSynthDoneCB(void *& userdata, uint32_t rate,
                             AudioSystem::audio_format format, int channel,
-                           int8_t *wav, size_t bufferSize) {
+                           int8_t *&wav, size_t &bufferSize, tts_synth_status status) {
      LOGI("ttsSynthDoneCallback: %d bytes", bufferSize);
  
+    if (userdata == NULL){
+        LOGE("userdata == NULL");
+        return TTS_CALLBACK_HALT;
+    }
      afterSynthData_t* pForAfter = (afterSynthData_t*)userdata;
+    SynthProxyJniStorage* pJniData = (SynthProxyJniStorage*)(pForAfter->jniStorage);
  
      if (pForAfter->usageMode == USAGEMODE_PLAY_IMMEDIATELY){
          LOGI("Direct speech");
  
          if (wav == NULL) {
+            delete pForAfter;
              LOGI("Null: speech has completed");
          }
  
          if (bufferSize > 0) {
-            SynthProxyJniStorage* pJniData =
-                    (SynthProxyJniStorage*)(pForAfter->jniStorage);
              prepAudioTrack(pJniData, rate, format, channel);
              if (pJniData->mAudioOut) {
                  pJniData->mAudioOut->write(wav, bufferSize);
@@ -187,6 +197,7 @@ static void ttsSynthDoneCB(void * userdata, uint32_t rate,
      } else  if (pForAfter->usageMode == USAGEMODE_WRITE_TO_FILE) {
          LOGI("Save to file");
          if (wav == NULL) {
+            delete pForAfter;
              LOGI("Null: speech has completed");
          }
          if (bufferSize > 0){
@@ -195,10 +206,17 @@ static void ttsSynthDoneCB(void * userdata, uint32_t rate,
      }
      // TODO update to call back into the SynthProxy class through the
      //      javaTTSFields.synthProxyMethodPost methode to notify
-    //      playback has completed
+    //      playback has completed if the synthesis is done, i.e.
+    //      if status == TTS_SYNTH_DONE
+    //delete pForAfter;
+
+    // we don't update the wav (output) parameter as we'll let the next callback
+    // write at the same location, we've consumed the data already, but we need
+    // to update bufferSize to let the TTS engine know how much it can write the
+    // next time it calls this function.
+    bufferSize = pJniData->mBufferSize;
  
-    delete pForAfter;
-    return;
+    return TTS_CALLBACK_CONTINUE;
  }
  
  
@@ -223,7 +241,9 @@ android_tts_SynthProxy_native_setup(JNIEnv *env, jobject thiz,
      } else {
          TtsEngine *(*get_TtsEngine)() =
              reinterpret_cast<TtsEngine* (*)()>(dlsym(engine_lib_handle, "getTtsEngine"));
+
          pJniStorage->mNativeSynthInterface = (*get_TtsEngine)();
+
          if (pJniStorage->mNativeSynthInterface) {
              pJniStorage->mNativeSynthInterface->init(ttsSynthDoneCB);
          }
@@ -323,7 +343,7 @@ android_tts_SynthProxy_synthesizeToFile(JNIEnv *env, jobject thiz, jint jniData,
  
      // TODO check return codes
      if (pSynthData->mNativeSynthInterface) {
-        pSynthData->mNativeSynthInterface->synthesizeText(textNativeString,
+        pSynthData->mNativeSynthInterface->synthesizeText(textNativeString, pSynthData->mBuffer, pSynthData->mBufferSize,
                  (void *)pForAfter);
      }
  
@@ -395,7 +415,7 @@ android_tts_SynthProxy_speak(JNIEnv *env, jobject thiz, jint jniData,
  
      if (pSynthData->mNativeSynthInterface) {
          const char *textNativeString = env->GetStringUTFChars(textJavaString, 0);
-        pSynthData->mNativeSynthInterface->synthesizeText(textNativeString,
+        pSynthData->mNativeSynthInterface->synthesizeText(textNativeString, pSynthData->mBuffer, pSynthData->mBufferSize,
                  (void *)pForAfter);
          env->ReleaseStringUTFChars(textJavaString, textNativeString);
      }
@@ -442,6 +462,7 @@ static void
  android_tts_SynthProxy_playAudioBuffer(JNIEnv *env, jobject thiz, jint jniData,
          int bufferPointer, int bufferSize)
  {
+LOGI("android_tts_SynthProxy_playAudioBuffer");
      if (jniData == 0) {
          LOGE("android_tts_SynthProxy_playAudioBuffer(): invalid JNI data");
          return;
author	Charles Chen <clchen@google.com>
	Fri, 5 Jun 2009 20:58:33 +0000 (13:58 -0700)
committer	Charles Chen <clchen@google.com>
	Fri, 5 Jun 2009 20:58:33 +0000 (13:58 -0700)
include/tts/TtsEngine.h		patch \| blob \| history
tts/jni/android_tts_SynthProxy.cpp	[changed mode: 0755->0644]	patch \| blob \| history