2 # -*- coding: utf-8 -*-
3 # Japanese speech engine test module
5 # http://ja.nishimotz.com/project:libopenjtalk
7 # > python jtalkRunner.py
8 # requires pyaudio (PortAudio wrapper)
9 # http://people.csail.mit.edu/hubert/pyaudio/
11 from __future__ import unicode_literals, print_function
19 JT_DIR = os.path.normpath(
20 os.path.join(os.getcwdu(), '..', 'source', 'synthDrivers', 'jtalk')
22 sys.path.append(JT_DIR)
23 from jtalkCore import *
26 JT_DLL = os.path.join(JT_DIR, 'libopenjtalk.dll')
35 "speaker_attenuation":1.0,
36 "htsvoice": os.path.join(JT_DIR, 'm001', 'm001.htsvoice'),
37 #"espeak_variant": "max",
46 "speaker_attenuation": 0.5,
47 "htsvoice": os.path.join(JT_DIR, 'mei', 'mei_normal.htsvoice'),
48 #"espeak_variant": "f1",
57 "speaker_attenuation": 1.0,
58 "htsvoice": os.path.join(JT_DIR, 'lite', 'voice.htsvoice'),
59 #"espeak_variant": "max",
63 def pa_play(data, samp_rate = 16000):
65 stream = p.open(format = p.get_format_from_width(2),
66 channels = 1, rate = samp_rate, output = True)
70 a = stream.get_write_available() * 2
74 time.sleep(float(size) / 2 / samp_rate)
79 print(s.encode('cp932', 'ignore'))
87 def do_synthesis(msg, voice_args, do_play, do_write, do_log, fperiod, pitch=50, inflection=50, vol=50):
88 msg = jtalkPrepare.convert(msg)
90 __print("utf-8: (%s)" % s.decode('utf-8', 'ignore'))
93 Mecab_print(mf, __print)
94 Mecab_correctFeatures(mf)
95 ar = Mecab_splitFeatures(mf)
96 __print('array size %d' % len(ar))
97 max_level = int(326.67 * int(vol) + 100) # 100..32767
98 level = int(max_level * voice_args['speaker_attenuation'])
99 lf0_amp = 0.020 * inflection # 50 = original range
100 ls = 0.015 * (pitch - 50.0 + voice_args['pitch_bias']) # 50 = no shift
101 lf0_offset = ls + voice_args['lf0_base'] * (1 - lf0_amp)
105 __print('feature size %d' % a.size)
106 Mecab_print(a, __print)
107 Mecab_utf8_to_cp932(a)
109 w = "_test%d.jt.wav" % count
113 l = "_test%d.jtlog" % count
116 data = libjt_synthesis(a.feature,
122 lf0_offset_ = lf0_offset,
128 __print('data size %d' % len(data))
130 pa_play(data, samp_rate = voice_args['samp_rate'])
132 w = wave.Wave_write("_test%d.wav" % count)
133 w.setparams( (1, 2, voice_args['samp_rate'], len(data)/2,
134 'NONE', 'not compressed') )
141 def main(do_play = False, do_write = True, do_log = False):
143 jpcommon = JPCommon()
144 engine = HTS_Engine()
145 libjt_initialize(JT_DLL)
147 libjt_load(v['htsvoice'])
148 Mecab_initialize(__print, JT_DIR, os.path.join(JT_DIR, 'dic'))
151 '100.25ドル。ウェルカムトゥー nvda テンキーのinsertキーと、メインのinsertキーの両方が、nvdaキーとして動作します',
155 fperiod = v['fperiod']
156 do_synthesis(s, v, do_play, do_write, do_log, fperiod, pitch=50, inflection=50)
158 if __name__ == '__main__':
159 main(do_play=False, do_write=True)
160 #prof = cProfile.run("main(do_play=True)", '_cprof.prof')
161 #p = pstats.Stats('_cprof.prof')
163 #p.sort_stats('time', 'calls')