1 # roma_dic_maker.py for nvdajp_jtalk
\r
2 # -*- coding: utf-8 -*-
\r
3 # since 2011-04-06 by Takuya Nishimoto
\r
4 from __future__ import unicode_literals
\r
6 OUT_FILE = 'nvdajp-roma-dic.csv'
\r
12 from alpha2mb import alpha2mb
\r
15 # third item is number of morae
\r
240 def isGoodEntry(s):
\r
242 if a[0] == 'echo' and a[12] == 'エチョー':
\r
244 if a[0] == 'use' and a[12] == 'ウセー':
\r
246 if a[0] == 'one' and a[12] == 'オネー':
\r
250 def make_dic(CODE, THISDIR):
\r
251 with open(path.join(THISDIR, OUT_FILE), "w") as file:
\r
257 for p in [('a', 'ア'), ('i', 'イ'), ('u', 'ウ'), ('e', 'エ'), ('o', 'オ')]:
\r
258 k1 = k1 = alpha2mb(k.lower() + p[0])
\r
259 y = i[1] + p[1] + 'ー'
\r
260 pros = "%d/%d" % (0, i[2] + 2)
\r
261 # 表層形,左文脈ID,右文脈ID,コスト,品詞,品詞細分類1,品詞細分類2,品詞細分類3,活用形,活用型,原形,読み,発音
\r
262 s = "%s,-1,-1,%.1f,名詞,一般,*,*,*,*,%s,%s,%s,%s,C0\n" % (k1,cost,k1,y,y,pros)
\r
263 if isGoodEntry(s): file.write(s.encode(CODE))
\r
265 for p in [('a', 'ア'), ('i', 'イ'), ('u', 'ウ'), ('e', 'エ'), ('o', 'オ')]:
\r
266 k1 = k1 = alpha2mb(p[0] + k.lower())
\r
267 y = p[1] + i[1] + 'ー'
\r
268 pros = "%d/%d" % (0, i[2] + 2)
\r
269 # 表層形,左文脈ID,右文脈ID,コスト,品詞,品詞細分類1,品詞細分類2,品詞細分類3,活用形,活用型,原形,読み,発音
\r
270 s = "%s,-1,-1,%.1f,名詞,一般,*,*,*,*,%s,%s,%s,%s,C0\n" % (k1,cost,k1,y,y,pros)
\r
271 if isGoodEntry(s): file.write(s.encode(CODE))
\r
276 k1 = k1 = alpha2mb(k.lower() + 'x')
\r
278 pros = "%d/%d" % (0, i[2] + 4)
\r
279 # 表層形,左文脈ID,右文脈ID,コスト,品詞,品詞細分類1,品詞細分類2,品詞細分類3,活用形,活用型,原形,読み,発音
\r
280 s = "%s,-1,-1,%.1f,名詞,一般,*,*,*,*,%s,%s,%s,%s,C0\n" % (k1,cost,k1,y,y,pros)
\r
281 if isGoodEntry(s): file.write(s.encode(CODE))
\r
286 k1 = k1 = alpha2mb(k.lower() + 'n')
\r
288 pros = "%d/%d" % (0, i[2] + 2)
\r
289 # 表層形,左文脈ID,右文脈ID,コスト,品詞,品詞細分類1,品詞細分類2,品詞細分類3,活用形,活用型,原形,読み,発音
\r
290 s = "%s,-1,-1,%.1f,名詞,一般,*,*,*,*,%s,%s,%s,%s,C0\n" % (k1,cost,k1,y,y,pros)
\r
291 if isGoodEntry(s): file.write(s.encode(CODE))
\r
296 k1 = k1 = alpha2mb(k.lower())
\r
298 pros = "%d/%d" % (0, i[2] + 1)
\r
299 # 表層形,左文脈ID,右文脈ID,コスト,品詞,品詞細分類1,品詞細分類2,品詞細分類3,活用形,活用型,原形,読み,発音
\r
300 s = "%s,-1,-1,%.1f,名詞,一般,*,*,*,*,%s,%s,%s,%s,C0\n" % (k1,cost,k1,y,y,pros)
\r
301 if isGoodEntry(s): file.write(s.encode(CODE))
\r
304 if __name__ == '__main__':
\r