OSDN Git Service

work around ti34107
[nvdajp/nvdajpmiscdep.git] / jptools / mecabRunner.py
1 # mecabRunner.py \r
2 # -*- coding: utf-8 -*-\r
3 # Japanese text processor test module\r
4 # by Takuya Nishimoto\r
5 \r
6 from __future__ import unicode_literals\r
7 import os\r
8 import sys\r
9 from glob import glob\r
10 jt_dir = os.path.normpath(\r
11         os.path.join(os.getcwdu(), '..', 'source', 'synthDrivers', 'jtalk')\r
12         )\r
13 sys.path.append(jt_dir)\r
14 from mecab import *\r
15 from mecabHarness import tasks\r
16 import jtalkDir\r
17 \r
18 dic = os.path.join(jt_dir, 'dic')\r
19 user_dics_org = jtalkDir.user_dics_org\r
20 user_dics = jtalkDir.user_dics\r
21 \r
22 def __print(s):\r
23         print s.encode('utf-8', 'ignore')\r
24 \r
25 buffer = ''\r
26 \r
27 def __print_dummy(s):\r
28         global buffer\r
29         buffer += s + '\n'\r
30 \r
31 def Mecab_get_reading(mf, CODE_=CODE):\r
32         reading = ''\r
33         braille = ''\r
34         for pos in xrange(0, mf.size):\r
35                 ar = Mecab_getFeature(mf, pos, CODE_=CODE_).split(',')\r
36                 rd = ''\r
37                 if len(ar) > 9:\r
38                         rd = ar[9].replace('\u3000', ' ')\r
39                 elif ar[0] != 'ー':\r
40                         rd = ar[0]\r
41                 reading += rd\r
42                 if len(ar) > 12:\r
43                         braille += ar[12] + r"/"\r
44                 else:\r
45                         braille += rd + r"/"\r
46         return (reading, braille.rstrip(r" /"))\r
47 \r
48 def get_reading(msg):\r
49         s = text2mecab(msg)\r
50         mf = MecabFeatures()\r
51         Mecab_analysis(s, mf)\r
52         Mecab_correctFeatures(mf)\r
53         Mecab_print(mf, __print_dummy)\r
54         reading = Mecab_get_reading(mf)\r
55         mf = None\r
56         return reading\r
57 \r
58 def runTasks(enableUserDic=False):\r
59         if enableUserDic:\r
60                 print jt_dir, dic, user_dics\r
61                 Mecab_initialize(__print, jt_dir, dic, user_dics)\r
62         else:\r
63                 print jt_dir, dic\r
64                 Mecab_initialize(__print, jt_dir, dic)\r
65         count = 0\r
66         for i in tasks:\r
67                 if isinstance(i, dict):\r
68                         if 'braille' in i:\r
69                                 if 'speech' in i:\r
70                                         item = [ i['text'], i['speech'], i['braille'] ]\r
71                                 else:\r
72                                         s = i['braille'].replace(' ', '').replace('/', '')\r
73                                         item = [ i['text'], s, i['braille'] ]\r
74                         elif 'input' in i:\r
75                                 if 'speech' in i:\r
76                                         item = [ i['text'], i['speech'], i['input'] ]\r
77                                 else:\r
78                                         s = i['input'].replace(' ', '').replace('/', '')\r
79                                         item = [ i['text'], s, i['input'] ]\r
80                         else:\r
81                                 item = [ i['text'], i['speech'] ]\r
82                 else:\r
83                         item = i\r
84                 buffer = ''\r
85                 result = get_reading(item[0])\r
86                 if item[1] is not None and result[0] != item[1]:\r
87                         __print('')\r
88                         __print('')\r
89                         __print(buffer)\r
90                         __print('input:    ' + item[0])\r
91                         __print('reading expected: ' + item[1])\r
92                         __print('reading result:   ' + result[0])\r
93                         count += 1\r
94                 if len(item) > 2 and result[1] != item[2]:\r
95                         __print('')\r
96                         __print('')\r
97                         __print(buffer)\r
98                         __print('input:            ' + item[0])\r
99                         __print('braille expected: ' + item[2])\r
100                         __print('braille result:   ' + result[1])\r
101                         count += 1\r
102 \r
103         return count\r
104 \r
105 if __name__ == '__main__':\r
106         runTasks(enableUserDic=True)\r