OSDN Git Service

do clean after use of python-jtalk
[nvdajp/nvdajpmiscdep.git] / jptools / jtalkRunner.py
1 # jtalkRunner.py
2 # -*- coding: utf-8 -*-
3 # Japanese speech engine test module
4 # by Takuya Nishimoto
5 # http://ja.nishimotz.com/project:libopenjtalk
6 # Usage:
7 # > python jtalkRunner.py
8 # requires pyaudio (PortAudio wrapper)
9 # http://people.csail.mit.edu/hubert/pyaudio/
10
11 from __future__ import unicode_literals, print_function
12 import os
13 import sys
14 import wave
15 import time
16 import pyaudio
17 import cProfile
18 import pstats
19 JT_DIR = os.path.normpath(
20         os.path.join(os.getcwdu(), '..', 'source', 'synthDrivers', 'jtalk')
21         )
22 sys.path.append(JT_DIR)
23 from jtalkCore import *
24 import jtalkPrepare
25
26 JT_DLL = os.path.join(JT_DIR, 'libopenjtalk.dll')
27
28 voices = [
29         {"id": "V1",
30          "name": "m1",
31          "lang":"ja",
32          "samp_rate": 48000,
33          "fperiod": 240,
34          "lf0_base":5.0,
35          "speaker_attenuation":1.0,
36          "htsvoice": os.path.join(JT_DIR, 'm001', 'm001.htsvoice'),
37          #"espeak_variant": "max",
38          },
39         {"id": "V2",
40          "name": "mei",
41          "lang":"ja",
42          "samp_rate": 48000,
43          "fperiod": 240,
44          "lf0_base": 5.86,
45          "pitch_bias": -10,
46          "speaker_attenuation": 0.5,
47          "htsvoice": os.path.join(JT_DIR, 'mei', 'mei_normal.htsvoice'),
48          #"espeak_variant": "f1",
49          },
50         {"id": "V3",
51          "name": "lite",
52          "lang":"ja",
53          "samp_rate": 16000,
54          "fperiod": 80,
55          "lf0_base": 5.0,
56          "pitch_bias": 0,
57          "speaker_attenuation": 1.0,
58          "htsvoice": os.path.join(JT_DIR, 'lite', 'voice.htsvoice'),
59          #"espeak_variant": "max",
60          },
61         ]
62
63 def pa_play(data, samp_rate = 16000):
64         p = pyaudio.PyAudio()
65         stream = p.open(format = p.get_format_from_width(2),
66                 channels = 1, rate = samp_rate, output = True)
67         size = len(data)
68         pos = 0 # byte count
69         while pos < size:
70                 a = stream.get_write_available() * 2
71                 o = data[pos:pos+a]
72                 stream.write(o)
73                 pos += a
74         time.sleep(float(size) / 2 / samp_rate)
75         stream.close()
76         p.terminate()
77
78 def __print(s):
79         print(s.encode('cp932', 'ignore'))
80
81 def print_code(msg):
82         s = ''
83         for c in msg:
84                 s += '%04x ' % ord(c)
85         print(s)
86
87 def do_synthesis(msg, voice_args, do_play, do_write, do_log, fperiod, pitch=50, inflection=50, vol=50):
88         msg = jtalkPrepare.convert(msg)
89         s = text2mecab(msg)
90         __print("utf-8: (%s)" % s.decode('utf-8', 'ignore'))
91         mf = MecabFeatures()
92         Mecab_analysis(s, mf)
93         Mecab_print(mf, __print)
94         Mecab_correctFeatures(mf)
95         ar = Mecab_splitFeatures(mf)
96         __print('array size %d' % len(ar))
97         max_level = int(326.67 * int(vol) + 100) # 100..32767
98         level = int(max_level * voice_args['speaker_attenuation'])
99         lf0_amp = 0.020 * inflection # 50 = original range
100         ls = 0.015 * (pitch - 50.0 + voice_args['pitch_bias']) # 50 = no shift
101         lf0_offset = ls + voice_args['lf0_base'] * (1 - lf0_amp)
102         count = 0
103         for a in ar:
104                 count += 1
105                 __print('feature size %d' % a.size)
106                 Mecab_print(a, __print)
107                 Mecab_utf8_to_cp932(a)
108                 if do_write:
109                         w = "_test%d.jt.wav" % count
110                 else:
111                         w = None
112                 if do_log:
113                         l = "_test%d.jtlog" % count
114                 else:
115                         l = None
116                 data = libjt_synthesis(a.feature,
117                                                            a.size,
118                                                            begin_thres_=32,
119                                                            end_thres_=32,
120                                                            level_=level,
121                                                            fperiod_ = fperiod,
122                                                            lf0_offset_ = lf0_offset,
123                                                            lf0_amp_ = lf0_amp,
124                                                            logwrite_ = __print,
125                                                            jtlogfile_ = l,
126                                                            jtwavfile_ = w)
127                 if data:
128                         __print('data size %d' % len(data))
129                         if do_play:
130                                 pa_play(data, samp_rate = voice_args['samp_rate'])
131                         if do_write:
132                                 w = wave.Wave_write("_test%d.wav" % count)
133                                 w.setparams( (1, 2, voice_args['samp_rate'], len(data)/2,
134                                                           'NONE', 'not compressed') )
135                                 w.writeframes(data)
136                                 w.close()
137                 libjt_refresh()
138                 del a
139         del mf
140
141 def main(do_play = False, do_write = True, do_log = False):
142         njd = NJD()
143         jpcommon = JPCommon()
144         engine = HTS_Engine()
145         libjt_initialize(JT_DLL)
146         v = voices[1]
147         libjt_load(v['htsvoice'])
148         Mecab_initialize(__print, JT_DIR, os.path.join(JT_DIR, 'dic'))
149
150         msgs = [
151                 '100.25ドル。ウェルカムトゥー nvda テンキーのinsertキーと、メインのinsertキーの両方が、nvdaキーとして動作します',
152                 'マーク。まーく。',
153                 ]
154         s = msgs[0]
155         fperiod = v['fperiod']
156         do_synthesis(s, v, do_play, do_write, do_log, fperiod, pitch=50, inflection=50)
157
158 if __name__ == '__main__':
159         main(do_play=False, do_write=True)
160         #prof = cProfile.run("main(do_play=True)", '_cprof.prof')
161         #p = pstats.Stats('_cprof.prof')
162         #p.strip_dirs()
163         #p.sort_stats('time', 'calls')
164         #p.print_stats()