OSDN Git Service

ti34735 in progress
[nvdajp/nvdajpmiscdep.git] / source / synthDrivers / jtalk / jtalkDriver.py
1 # -*- coding: utf-8 -*-
2 #A part of NonVisual Desktop Access (NVDA)
3 # speech engine nvdajp_jtalk
4 # Copyright (C) 2010-2014 Takuya Nishimoto (nishimotz.com)
5
6 from logHandler import log
7 import time
8 import Queue
9 import os
10 import codecs
11 import re
12 import string
13 import ctypes
14 import baseObject
15 import copy
16 import nvwave
17 from .. import _espeak
18 from jtalkCore import *
19 import jtalkPrepare 
20 from ..jtalk._nvdajp_unicode import unicode_normalize
21 from ..jtalk import _bgthread
22 import time
23 import watchdog
24 import config
25 from jtalkDir import jtalk_dir, dic_dir, user_dics
26
27 DEBUG = False
28
29 RATE_BOOST_MULTIPLIER = 1.5
30
31 # math.log(150) = 5.0, math.log(350) = 5.86
32 _jtalk_voices = [
33         {"id": "V1",
34          "name": "m001",
35          "lang":"ja",
36          "samp_rate": 48000,
37          "fperiod": 240,
38          "lf0_base": 5.0,
39          "pitch_bias": 0,
40          "speaker_attenuation": 1.0,
41          "htsvoice": os.path.join(jtalk_dir, 'm001', 'm001.htsvoice'),
42          "espeak_variant": "max"},
43         {"id": "V2",
44          "name": "mei",
45          "lang":"ja",
46          "samp_rate": 48000,
47          "fperiod": 240,
48          "lf0_base": 5.86,
49          "pitch_bias": -10,
50          "speaker_attenuation": 0.5,
51          "htsvoice": os.path.join(jtalk_dir, 'mei', 'mei_normal.htsvoice'),
52          "espeak_variant": "f1"},
53         {"id": "V3",
54          "name": "lite",
55          "lang":"ja",
56          "samp_rate": 16000,
57          "fperiod": 80,
58          "lf0_base": 5.0,
59          "pitch_bias": 0,
60          "speaker_attenuation": 1.0,
61          "htsvoice": os.path.join(jtalk_dir, 'lite', 'voice.htsvoice'),
62          "espeak_variant": "max"},
63 ]
64 default_jtalk_voice = _jtalk_voices[1] # V2
65 voice_args = None
66
67 class VoiceProperty(baseObject.AutoPropertyObject):
68         def __init__(self):
69                 super(VoiceProperty,self).__init__()
70
71 # if samp_rate==16000: normal speed = 80samples period
72 fperiod = 240
73
74 # gain control
75 max_level = 32000
76 thres_level = 128
77 thres2_level = 128
78 speaker_attenuation = 1.0
79
80 logwrite = log.debug
81 lastIndex = None
82 currIndex = None
83 lastIndex = None
84 player = None
85 currentEngine = 0 # 1:espeak 2:jtalk
86
87 def isSpeaking():
88         return _bgthread.isSpeaking
89
90 def setSpeaking(b):
91         _bgthread.isSpeaking = b
92
93 def _jtalk_speak(msg, index=None, prop=None):
94         global currIndex, buff
95         global currentEngine
96         global lastIndex
97         if prop is None: return
98         currIndex = index
99         if prop.characterMode:
100                 fperiod_current = voice_args['fperiod']
101         else:
102                 fperiod_current = fperiod
103         msg = unicode_normalize(msg)
104         msg = jtalkPrepare.convert(msg)
105         lw = None
106         if DEBUG: lw = logwrite
107         setSpeaking(True)
108         currentEngine = 2
109         if DEBUG: logwrite("p:%d i:%d msg:%s" % (prop.pitch, prop.inflection, msg))
110         level = int(max_level * speaker_attenuation)
111         la = 0.020 * prop.inflection # 50 = original range
112         ls = 0.015 * (prop.pitch - 50.0 + voice_args['pitch_bias']) # 50 = no shift
113         lo = ls + voice_args['lf0_base'] * (1 - la)
114         if DEBUG: logwrite("lo:%f la:%f" % (lo, la))
115         for t in string.split(msg):
116                 if DEBUG: logwrite("unicode (%s)" % t)
117                 s = text2mecab(t)
118                 if DEBUG: logwrite("utf-8 (%s)" % s.decode('utf-8', 'ignore'))
119                 if not isSpeaking(): libjt_refresh(); return
120                 mf = MecabFeatures()
121                 Mecab_analysis(s, mf, logwrite_=logwrite)
122                 if DEBUG: Mecab_print(mf, logwrite)
123                 Mecab_correctFeatures(mf)
124                 if DEBUG: Mecab_print(mf, logwrite)
125                 ar = Mecab_splitFeatures(mf, CODE_='utf-8')
126                 for a in ar:
127                         if isSpeaking():
128                                 if DEBUG: Mecab_print(a, logwrite, CODE_='utf-8')
129                                 Mecab_utf8_to_cp932(a)
130                                 if DEBUG: logwrite("Mecab_analysis done")
131                                 libjt_synthesis(
132                                         a.feature,
133                                         a.size,
134                                         fperiod_ = fperiod_current,
135                                         feed_func_ = player.feed, # player.feed() is called inside
136                                         is_speaking_func_ = isSpeaking,
137                                         begin_thres_ = thres_level,
138                                         end_thres_ = thres2_level,
139                                         level_ = level,
140                                         logwrite_ = lw,
141                                         lf0_offset_ = lo,
142                                         lf0_amp_ = la)
143                                 libjt_refresh()
144                                 if DEBUG: logwrite("libjt_synthesis done")
145                         del a
146                 del mf
147         player.sync()
148         lastIndex = currIndex
149         currIndex = None
150         setSpeaking(False)
151         currentEngine = 0
152
153 espeakMark = 10000
154
155 def _espeak_speak(msg, lang, index=None, prop=None):
156         global currentEngine, lastIndex, espeakMark
157         currentEngine = 1
158         msg = unicode(msg)
159         msg.translate({ord(u'\01'):None,ord(u'<'):u'&lt;',ord(u'>'):u'&gt;'})
160         msg = u"<voice xml:lang=\"%s\">%s</voice>" % (lang, msg)
161         msg += u"<mark name=\"%d\" />" % espeakMark
162         _espeak.speak(msg)
163         while currentEngine == 1 and _espeak.lastIndex != espeakMark:
164                 time.sleep(0.1)
165                 watchdog.alive()
166         time.sleep(0.4)
167         watchdog.alive()
168         lastIndex = index
169         currentEngine = 0
170         espeakMark += 1
171
172 # call from BgThread
173 def _speak(arg):
174         msg, lang, index, prop = arg
175         if DEBUG: logwrite('[' + lang + ']' + msg)
176         if DEBUG: logwrite("_speak(%s)" % msg)
177         if lang == 'ja':
178                 _jtalk_speak(msg, index, prop)
179         else:
180                 _espeak_speak(msg, lang, index, prop)
181
182 # call from BgThread
183 def _updateSpeakIndex(index):
184         global currIndex
185         global lastIndex
186         lastIndex = currIndex = index
187
188 def speak(msg, lang, index=None, voiceProperty_=None):
189         if msg is None and lang is None:
190                 _bgthread.execWhenDone(_updateSpeakIndex, index, mustBeAsync=True)
191                 return
192         msg = msg.strip()
193         if len(msg) == 0: return
194         if voiceProperty_ is None: return
195         arg = [msg, lang, index, copy.deepcopy(voiceProperty_)]
196         _bgthread.execWhenDone(_speak, arg, mustBeAsync=True)
197
198 def stop():
199         global currentEngine
200         if currentEngine == 1:
201                 _espeak.stop()
202                 currentEngine = 0
203                 return
204         # Kill all speech from now.
205         # We still want parameter changes to occur, so requeue them.
206         params = []
207         stop_task_count = 0 # for log.info()
208         try:
209                 while True:
210                         item = _bgthread.bgQueue.get_nowait() # [func, args, kwargs]
211                         if item[0] != _speak:
212                                 params.append(item)
213                         else:
214                                 stop_task_count = stop_task_count + 1
215                         _bgthread.bgQueue.task_done()
216         except Queue.Empty:
217                 # Let the exception break us out of this loop, as queue.empty() is not reliable anyway.
218                 pass
219         for item in params:
220                 _bgthread.bgQueue.put(item)
221         setSpeaking(False)
222         if DEBUG: logwrite("stop: %d task(s) stopping" % stop_task_count)
223         player.stop()
224         lastIndex = None
225
226 def pause(switch):
227         if currentEngine == 1:
228                 _espeak.pause(switch)
229         elif currentEngine == 2:
230                 player.pause(switch)
231
232 def initialize(voice = default_jtalk_voice):
233         global player, voice_args
234         global speaker_attenuation
235         voice_args = voice
236         speaker_attenuation = voice_args['speaker_attenuation']
237         if not _espeak.espeakDLL:
238                 _espeak.initialize()
239                 log.debug("jtalk using eSpeak version %s" % _espeak.info())
240         _espeak.setVoiceByLanguage("en")
241         _espeak.setVoiceAndVariant(variant=voice["espeak_variant"])
242         if not player:
243                 player = nvwave.WavePlayer(channels=1, samplesPerSec=voice_args['samp_rate'], bitsPerSample=16, outputDevice=config.conf["speech"]["outputDevice"])
244         if not _bgthread.bgThread:
245                 _bgthread.initialize()
246         if not mecab:
247                 Mecab_initialize(log.info, jtalk_dir, dic_dir, user_dics)
248         jtalkPrepare.setup()
249
250         jt_dll = os.path.join(jtalk_dir, 'libopenjtalk.dll')
251         log.debug('jt_dll %s' % jt_dll)
252         libjt_initialize(jt_dll)
253         log.debug(libjt_version())
254
255         if os.path.isfile(voice_args['htsvoice']):
256                 libjt_load(voice_args['htsvoice'])
257                 log.info("loaded " + voice_args['htsvoice'])
258         else:
259                 log.error("load error " + voice_args['htsvoice'])
260
261 def terminate():
262         global player
263         stop()
264         _bgthread.terminate()
265         player.close()
266         player = None
267         _espeak.terminate()
268
269 rate_percent = 50
270
271 def get_rate(rateBoost):
272         return rate_percent
273
274 def set_rate(rate, rateBoost):
275         global fperiod, rate_percent
276         rate_percent = rate
277         if voice_args['samp_rate'] == 16000:
278                 fperiod = int(80 - int(rate) / 2) # 80..30
279         if voice_args['samp_rate'] == 48000:
280                 fperiod = int(240 - 1.5 * int(rate)) # 240..90
281         if not rateBoost:
282                 fperiod = int(fperiod * RATE_BOOST_MULTIPLIER)
283
284 def set_volume(vol):
285         global max_level, thres_level, thres2_level
286         max_level = int(326.67 * int(vol) + 100) # 100..32767
287         thres_level = 128
288         thres2_level = 128
289