source/synthDrivers/jtalk/jtalkDriver.py

   1 # -*- coding: utf-8 -*-
   2 #A part of NonVisual Desktop Access (NVDA)
   3 # speech engine nvdajp_jtalk
   4 # Copyright (C) 2010-2014 Takuya Nishimoto (nishimotz.com)
   5
   6 from logHandler import log
   7 import time
   8 import Queue
   9 import os
  10 import codecs
  11 import re
  12 import string
  13 import ctypes
  14 import baseObject
  15 import copy
  16 import nvwave
  17 from .. import _espeak
  18 from jtalkCore import *
  19 import jtalkPrepare
  20 from ..jtalk._nvdajp_unicode import unicode_normalize
  21 from ..jtalk import _bgthread
  22 import time
  23 import watchdog
  24 import config
  25 from jtalkDir import jtalk_dir, dic_dir, user_dics
  26
  27 DEBUG = False
  28
  29 RATE_BOOST_MULTIPLIER = 1.5
  30
  31 # math.log(150) = 5.0, math.log(350) = 5.86
  32 _jtalk_voices = [
  33         {"id": "V1",
  34          "name": "m001",
  35          "lang":"ja",
  36          "samp_rate": 48000,
  37          "fperiod": 240,
  38          "lf0_base": 5.0,
  39          "pitch_bias": 0,
  40          "speaker_attenuation": 1.0,
  41          "htsvoice": os.path.join(jtalk_dir, 'm001', 'm001.htsvoice'),
  42          "espeak_variant": "max"},
  43         {"id": "V2",
  44          "name": "mei",
  45          "lang":"ja",
  46          "samp_rate": 48000,
  47          "fperiod": 240,
  48          "lf0_base": 5.86,
  49          "pitch_bias": -10,
  50          "speaker_attenuation": 0.5,
  51          "htsvoice": os.path.join(jtalk_dir, 'mei', 'mei_normal.htsvoice'),
  52          "espeak_variant": "f1"},
  53         {"id": "V3",
  54          "name": "lite",
  55          "lang":"ja",
  56          "samp_rate": 16000,
  57          "fperiod": 80,
  58          "lf0_base": 5.0,
  59          "pitch_bias": 0,
  60          "speaker_attenuation": 1.0,
  61          "htsvoice": os.path.join(jtalk_dir, 'lite', 'voice.htsvoice'),
  62          "espeak_variant": "max"},
  63 ]
  64 default_jtalk_voice = _jtalk_voices[1] # V2
  65 voice_args = None
  66
  67 class VoiceProperty(baseObject.AutoPropertyObject):
  68         def __init__(self):
  69                 super(VoiceProperty,self).__init__()
  70
  71 # if samp_rate==16000: normal speed = 80samples period
  72 fperiod = 240
  73
  74 # gain control
  75 max_level = 32000
  76 thres_level = 128
  77 thres2_level = 128
  78 speaker_attenuation = 1.0
  79
  80 logwrite = log.debug
  81 lastIndex = None
  82 currIndex = None
  83 lastIndex = None
  84 player = None
  85 currentEngine = 0 # 1:espeak 2:jtalk
  86
  87 def isSpeaking():
  88         return _bgthread.isSpeaking
  89
  90 def setSpeaking(b):
  91         _bgthread.isSpeaking = b
  92
  93 def _jtalk_speak(msg, index=None, prop=None):
  94         global currIndex, buff
  95         global currentEngine
  96         global lastIndex
  97         if prop is None: return
  98         currIndex = index
  99         if prop.characterMode:
 100                 fperiod_current = voice_args['fperiod']
 101         else:
 102                 fperiod_current = fperiod
 103         msg = unicode_normalize(msg)
 104         msg = jtalkPrepare.convert(msg)
 105         lw = None
 106         if DEBUG: lw = logwrite
 107         setSpeaking(True)
 108         currentEngine = 2
 109         if DEBUG: logwrite("p:%d i:%d msg:%s" % (prop.pitch, prop.inflection, msg))
 110         level = int(max_level * speaker_attenuation)
 111         la = 0.020 * prop.inflection # 50 = original range
 112         ls = 0.015 * (prop.pitch - 50.0 + voice_args['pitch_bias']) # 50 = no shift
 113         lo = ls + voice_args['lf0_base'] * (1 - la)
 114         if DEBUG: logwrite("lo:%f la:%f" % (lo, la))
 115         for t in string.split(msg):
 116                 if DEBUG: logwrite("unicode (%s)" % t)
 117                 s = text2mecab(t)
 118                 if DEBUG: logwrite("utf-8 (%s)" % s.decode('utf-8', 'ignore'))
 119                 if not isSpeaking(): libjt_refresh(); return
 120                 mf = MecabFeatures()
 121                 Mecab_analysis(s, mf, logwrite_=logwrite)
 122                 if DEBUG: Mecab_print(mf, logwrite)
 123                 Mecab_correctFeatures(mf)
 124                 if DEBUG: Mecab_print(mf, logwrite)
 125                 ar = Mecab_splitFeatures(mf, CODE_='utf-8')
 126                 for a in ar:
 127                         if isSpeaking():
 128                                 if DEBUG: Mecab_print(a, logwrite, CODE_='utf-8')
 129                                 Mecab_utf8_to_cp932(a)
 130                                 if DEBUG: logwrite("Mecab_analysis done")
 131                                 libjt_synthesis(
 132                                         a.feature,
 133                                         a.size,
 134                                         fperiod_ = fperiod_current,
 135                                         feed_func_ = player.feed, # player.feed() is called inside
 136                                         is_speaking_func_ = isSpeaking,
 137                                         begin_thres_ = thres_level,
 138                                         end_thres_ = thres2_level,
 139                                         level_ = level,
 140                                         logwrite_ = lw,
 141                                         lf0_offset_ = lo,
 142                                         lf0_amp_ = la)
 143                                 libjt_refresh()
 144                                 if DEBUG: logwrite("libjt_synthesis done")
 145                         del a
 146                 del mf
 147         player.sync()
 148         lastIndex = currIndex
 149         currIndex = None
 150         setSpeaking(False)
 151         currentEngine = 0
 152
 153 espeakMark = 10000
 154
 155 def _espeak_speak(msg, lang, index=None, prop=None):
 156         global currentEngine, lastIndex, espeakMark
 157         currentEngine = 1
 158         msg = unicode(msg)
 159         msg.translate({ord(u'\01'):None,ord(u'<'):u'&lt;',ord(u'>'):u'&gt;'})
 160         msg = u"<voice xml:lang=\"%s\">%s</voice>" % (lang, msg)
 161         msg += u"<mark name=\"%d\" />" % espeakMark
 162         _espeak.speak(msg)
 163         while currentEngine == 1 and _espeak.lastIndex != espeakMark:
 164                 time.sleep(0.1)
 165                 watchdog.alive()
 166         time.sleep(0.4)
 167         watchdog.alive()
 168         lastIndex = index
 169         currentEngine = 0
 170         espeakMark += 1
 171
 172 # call from BgThread
 173 def _speak(arg):
 174         msg, lang, index, prop = arg
 175         if DEBUG: logwrite('[' + lang + ']' + msg)
 176         if DEBUG: logwrite("_speak(%s)" % msg)
 177         if lang == 'ja':
 178                 _jtalk_speak(msg, index, prop)
 179         else:
 180                 _espeak_speak(msg, lang, index, prop)
 181
 182 # call from BgThread
 183 def _updateSpeakIndex(index):
 184         global currIndex
 185         global lastIndex
 186         lastIndex = currIndex = index
 187
 188 def speak(msg, lang, index=None, voiceProperty_=None):
 189         if msg is None and lang is None:
 190                 _bgthread.execWhenDone(_updateSpeakIndex, index, mustBeAsync=True)
 191                 return
 192         msg = msg.strip()
 193         if len(msg) == 0: return
 194         if voiceProperty_ is None: return
 195         arg = [msg, lang, index, copy.deepcopy(voiceProperty_)]
 196         _bgthread.execWhenDone(_speak, arg, mustBeAsync=True)
 197
 198 def stop():
 199         global currentEngine
 200         if currentEngine == 1:
 201                 _espeak.stop()
 202                 currentEngine = 0
 203                 return
 204         # Kill all speech from now.
 205         # We still want parameter changes to occur, so requeue them.
 206         params = []
 207         stop_task_count = 0 # for log.info()
 208         try:
 209                 while True:
 210                         item = _bgthread.bgQueue.get_nowait() # [func, args, kwargs]
 211                         if item[0] != _speak:
 212                                 params.append(item)
 213                         else:
 214                                 stop_task_count = stop_task_count + 1
 215                         _bgthread.bgQueue.task_done()
 216         except Queue.Empty:
 217                 # Let the exception break us out of this loop, as queue.empty() is not reliable anyway.
 218                 pass
 219         for item in params:
 220                 _bgthread.bgQueue.put(item)
 221         setSpeaking(False)
 222         if DEBUG: logwrite("stop: %d task(s) stopping" % stop_task_count)
 223         player.stop()
 224         lastIndex = None
 225
 226 def pause(switch):
 227         if currentEngine == 1:
 228                 _espeak.pause(switch)
 229         elif currentEngine == 2:
 230                 player.pause(switch)
 231
 232 def initialize(voice = default_jtalk_voice):
 233         global player, voice_args
 234         global speaker_attenuation
 235         voice_args = voice
 236         speaker_attenuation = voice_args['speaker_attenuation']
 237         if not _espeak.espeakDLL:
 238                 _espeak.initialize()
 239                 log.debug("jtalk using eSpeak version %s" % _espeak.info())
 240         _espeak.setVoiceByLanguage("en")
 241         _espeak.setVoiceAndVariant(variant=voice["espeak_variant"])
 242         if not player:
 243                 player = nvwave.WavePlayer(channels=1, samplesPerSec=voice_args['samp_rate'], bitsPerSample=16, outputDevice=config.conf["speech"]["outputDevice"])
 244         if not _bgthread.bgThread:
 245                 _bgthread.initialize()
 246         if not mecab:
 247                 Mecab_initialize(log.info, jtalk_dir, dic_dir, user_dics)
 248         jtalkPrepare.setup()
 249
 250         jt_dll = os.path.join(jtalk_dir, 'libopenjtalk.dll')
 251         log.debug('jt_dll %s' % jt_dll)
 252         libjt_initialize(jt_dll)
 253         log.debug(libjt_version())
 254
 255         if os.path.isfile(voice_args['htsvoice']):
 256                 libjt_load(voice_args['htsvoice'])
 257                 log.info("loaded " + voice_args['htsvoice'])
 258         else:
 259                 log.error("load error " + voice_args['htsvoice'])
 260
 261 def terminate():
 262         global player
 263         stop()
 264         _bgthread.terminate()
 265         player.close()
 266         player = None
 267         _espeak.terminate()
 268
 269 rate_percent = 50
 270
 271 def get_rate(rateBoost):
 272         return rate_percent
 273
 274 def set_rate(rate, rateBoost):
 275         global fperiod, rate_percent
 276         rate_percent = rate
 277         if voice_args['samp_rate'] == 16000:
 278                 fperiod = int(80 - int(rate) / 2) # 80..30
 279         if voice_args['samp_rate'] == 48000:
 280                 fperiod = int(240 - 1.5 * int(rate)) # 240..90
 281         if not rateBoost:
 282                 fperiod = int(fperiod * RATE_BOOST_MULTIPLIER)
 283
 284 def set_volume(vol):
 285         global max_level, thres_level, thres2_level
 286         max_level = int(326.67 * int(vol) + 100) # 100..32767
 287         thres_level = 128
 288         thres2_level = 128
 289