+++ /dev/null
-from synthDriverHandler import SynthDriver\r
-class SynthDriver(SynthDriver):\r
- @classmethod\r
- def check(cls):\r
- return False\r
+++ /dev/null
-# _bgthread.py \r
-# -*- coding: utf-8 -*-\r
-#A part of NonVisual Desktop Access (NVDA)\r
-#Copyright (C) 2006-2010 NVDA Contributors <http://www.nvda-project.org/>\r
-#Copyright (C) 2010-2012 Takuya Nishimoto (nishimotz.com)\r
-#Copyright (C) 2013 Masamitsu Misono (043.jp)\r
-#This file is covered by the GNU General Public License.\r
-#See the file COPYING for more details.\r
-#\r
-# based on NVDA (synthDrivers/_espeak.py)\r
-\r
-from logHandler import log\r
-import threading\r
-import Queue\r
-\r
-bgThread = None\r
-bgQueue = None\r
-isSpeaking = False\r
-\r
-class BgThread(threading.Thread):\r
- def __init__(self):\r
- threading.Thread.__init__(self)\r
- self.setDaemon(True)\r
-\r
- def run(self):\r
- global isSpeaking\r
- while True:\r
- func, args, kwargs = bgQueue.get()\r
- if not func:\r
- break\r
- try:\r
- func(*args, **kwargs)\r
- except:\r
- log.error("Error running function from queue", exc_info=True)\r
- finally:\r
- isSpeaking = False\r
- bgQueue.task_done()\r
-\r
-def execWhenDone(func, *args, **kwargs):\r
- global bgQueue\r
- # This can't be a kwarg in the function definition because it will consume the first non-keywor dargument which is meant for func.\r
- mustBeAsync = kwargs.pop("mustBeAsync", False)\r
- if mustBeAsync or bgQueue.unfinished_tasks != 0:\r
- # Either this operation must be asynchronous or There is still an operation in progress.\r
- # Therefore, run this asynchronously in the background thread.\r
- bgQueue.put((func, args, kwargs))\r
- else:\r
- func(*args, **kwargs)\r
-\r
-def initialize():\r
- global bgThread, bgQueue\r
- bgQueue = Queue.Queue()\r
- bgThread = BgThread()\r
- bgThread.start()\r
-\r
-def terminate():\r
- global bgThread, bgQueue\r
- bgQueue.put((None, None, None))\r
- bgThread.join()\r
- bgThread = None\r
- bgQueue = None\r
+++ /dev/null
-# _jtalk_core.py
-# -*- coding: utf-8 -*-
-#A part of NonVisual Desktop Access (NVDA)
-#Copyright (C) 2010-2012 Takuya Nishimoto (NVDA Japanese Team)
-#This file is covered by the GNU General Public License.
-#See the file COPYING for more details.
-
-# Japanese speech engine wrapper for Open JTalk
-# http://ja.nishimotz.com/project:libopenjtalk
-
-import codecs
-import re
-import string
-import os
-import struct
-import sys
-from mecab import *
-
-############################################
-
-# htsengineapi/include/HTS_engine.h
-
-# size of structure:
-# HTS_Global 56
-# HTS_ModelSet 76
-# HTS_Label 24
-# HTS_SStreamSet 24
-# HTS_PStreamSet 12
-# HTS_GStreamSet 20
-
-class HTS_ModelSet(Structure):
- _fields_ = [
- ("_dummy", c_byte * 56),
- ]
-
-class HTS_Label(Structure):
- _fields_ = [
- ("_dummy", c_byte * 76),
- ]
-HTS_Label_ptr = POINTER(HTS_Label)
-
-class HTS_SStreamSet(Structure):
- _fields_ = [
- ("_dummy", c_byte * 24),
- ]
-
-class HTS_PStreamSet(Structure):
- _fields_ = [
- ("_dummy", c_byte * 12),
- ]
-
-class HTS_GStream(Structure):
- _fields_ = [
- ("static_length", c_int), # int static_length; /* static features length */
- ("par", c_double_p_p), # double **par; /* generated parameter */
- ]
-
-HTS_GStream_ptr = POINTER(HTS_GStream)
-
-# FIXME: engine.gss.total_nsample is always 0
-class HTS_GStreamSet(Structure):
- _fields_ = [
- ("total_nsample", c_int), # int total_nsample; /* total sample */
- ("total_frame", c_int), # int total_frame; /* total frame */
- ("nstream", c_int), # int nstream; /* # of streams */
- ("gstream", HTS_GStream_ptr), # HTS_GStream *gstream; /* generated parameter streams */
- ("gspeech", c_short_p), # short *gspeech; /* generated speech */
- ]
-HTS_GStreamSet_ptr = POINTER(HTS_GStreamSet)
-
-class HTS_Global(Structure):
- _fields_ = [
- ("state", c_int), # /* Gamma=-1/stage : if stage=0 then Gamma=0 */
- ("use_log_gain", c_int), # HTS_Boolean (TRUE=1) /* log gain flag (for LSP) */
- ("sampling_rate", c_int), # /* sampling rate */
- ("fperiod", c_int), # /* frame period */
- ("alpha", c_double), # /* all-pass constant */
- ("beta", c_double), # /* postfiltering coefficient */
- ("audio_buff_size", c_int), # /* audio buffer size (for audio device) */
- ("msd_threshold", c_double_p), # /* MSD thresholds */
- ("duration_iw", c_double_p), # /* weights for duration interpolation */
- ("parameter_iw", c_double_p_p), # /* weights for parameter interpolation */
- ("gv_iw", c_double_p_p), # /* weights for GV interpolation */
- ("gv_weight", c_double_p), # /* GV weights */
- ]
-HTS_Global_ptr = POINTER(HTS_Global)
-
-class HTS_Engine(Structure):
- _fields_ = [
- ("global", HTS_Global),
- ("audio", c_void_p), # HTS_Audio (requires nvdajp miscdep 86 or later)
- ("ms", HTS_ModelSet),
- ("label", HTS_Label),
- ("sss", HTS_SStreamSet),
- ("pss", HTS_PStreamSet),
- ("gss", HTS_GStreamSet),
- ("lf0_offset", c_double),
- ("lf0_amp", c_double),
- ]
-HTS_Engine_ptr = POINTER(HTS_Engine)
-
-############################################
-
-class NJD(Structure):
- _fields_ = [
- ("_dummy", c_byte * 8),
- ]
-NJD_ptr = POINTER(NJD)
-
-class JPCommonNode(Structure):
- pass
-JPCommonNode_ptr = POINTER(JPCommonNode)
-JPCommonNode._fields_ = [
- ('pron', c_char_p),
- ('pos', c_char_p),
- ('ctype', c_char_p),
- ('cform', c_char_p),
- ('acc', c_int),
- ('chain_flag', c_int),
- ('prev', JPCommonNode_ptr),
- ('next', JPCommonNode_ptr),
- ]
-
-class JPCommonLabelBreathGroup(Structure):
- pass
-JPCommonLabelBreathGroup_ptr = POINTER(JPCommonLabelBreathGroup)
-
-class JPCommonLabelAccentPhrase(Structure):
- pass
-JPCommonLabelAccentPhrase_ptr = POINTER(JPCommonLabelAccentPhrase)
-
-class JPCommonLabelWord(Structure):
- pass
-JPCommonLabelWord_ptr = POINTER(JPCommonLabelWord)
-
-class JPCommonLabelMora(Structure):
- pass
-JPCommonLabelMora_ptr = POINTER(JPCommonLabelMora)
-
-class JPCommonLabelPhoneme(Structure):
- pass
-JPCommonLabelPhoneme_ptr = POINTER(JPCommonLabelPhoneme)
-
-# jpcommon/jpcommon.h
-class JPCommonLabel(Structure):
- _fields_ = [
- ('size', c_int),
- ('feature', c_char_p_p),
- ('breath_head', JPCommonLabelBreathGroup_ptr),
- ('breath_tail', JPCommonLabelBreathGroup_ptr),
- ('accent_head', JPCommonLabelAccentPhrase_ptr),
- ('accent_tail', JPCommonLabelAccentPhrase_ptr),
- ('word_head', JPCommonLabelWord_ptr),
- ('word_tail', JPCommonLabelWord_ptr),
- ('mora_head', JPCommonLabelMora_ptr),
- ('mora_tail', JPCommonLabelMora_ptr),
- ('phoneme_head', JPCommonLabelPhoneme_ptr),
- ('phoneme_tail', JPCommonLabelPhoneme_ptr),
- ('short_pause_flag', c_int),
- ]
-JPCommonLabel_ptr = POINTER(JPCommonLabel)
-
-class JPCommon(Structure):
- _fields_ = [
- ("head", JPCommonNode_ptr),
- ("tail", JPCommonNode_ptr),
- ("label", JPCommonLabel_ptr),
- ]
-JPCommon_ptr = POINTER(JPCommon)
-
-# for debug
-def JPC_label_print(feature, size, logwrite_):
- if logwrite_ is None: return
- if feature is None or size is None:
- logwrite_( "JPC_label_print size: 0" )
- return
- s2 = "JPC_label_print size: %d\n" % size
- for i in xrange(0, size):
- s = string_at(feature[i])
- if s:
- s2 += "%s\n" % s
- else:
- s2 += "[None]"
- logwrite_(s2)
-
-#############################################
-
-FNLEN = 1000
-FILENAME = c_char * FNLEN
-FILENAME_ptr = POINTER(FILENAME)
-FILENAME_ptr_ptr = POINTER(FILENAME_ptr)
-FILENAME_ptr_x3 = FILENAME_ptr * 3
-FILENAME_ptr_x3_ptr = POINTER(FILENAME_ptr_x3)
-
-libjt = None
-njd = NJD()
-jpcommon = JPCommon()
-engine = HTS_Engine()
-use_lpf = 0
-
-def libjt_version():
- if libjt is None: return "libjt version none"
- return libjt.jt_version()
-
-def libjt_initialize(JT_DLL, **args):
- global libjt, njd, jpcommon, engine, use_lpf
- use_lpf = args['use_lpf']
-
- if libjt is None: libjt = cdll.LoadLibrary(JT_DLL.encode('mbcs'))
- libjt.jt_version.restype = c_char_p
-
- libjt.NJD_initialize.argtypes = [NJD_ptr]
- libjt.NJD_initialize(njd)
-
- libjt.JPCommon_initialize.argtypes = [JPCommon_ptr]
- libjt.JPCommon_initialize(jpcommon)
-
- libjt.HTS_Engine_initialize.argtypes = [HTS_Engine_ptr, c_int]
- if use_lpf:
- libjt.HTS_Engine_initialize(engine, 3)
- else:
- libjt.HTS_Engine_initialize(engine, 2)
-
- libjt.HTS_Engine_set_sampling_rate.argtypes = [HTS_Engine_ptr, c_int]
- libjt.HTS_Engine_set_sampling_rate(engine, args['samp_rate']) # 16000
-
- libjt.HTS_Engine_set_fperiod.argtypes = [HTS_Engine_ptr, c_int]
- libjt.HTS_Engine_set_fperiod(engine, args['fperiod']) # if samping-rate is 16000: 80(point=5ms) frame period
-
- libjt.HTS_Engine_set_alpha.argtypes = [HTS_Engine_ptr, c_double]
- libjt.HTS_Engine_set_alpha(engine, args['alpha']) # 0.42
-
- libjt.HTS_Engine_set_gamma.argtypes = [HTS_Engine_ptr, c_int]
- libjt.HTS_Engine_set_gamma(engine, 0)
-
- libjt.HTS_Engine_set_log_gain.argtypes = [HTS_Engine_ptr, c_int]
- libjt.HTS_Engine_set_log_gain(engine, 0)
-
- libjt.HTS_Engine_set_beta.argtypes = [HTS_Engine_ptr, c_double]
- libjt.HTS_Engine_set_beta(engine, 0.0)
-
- libjt.HTS_Engine_set_audio_buff_size.argtypes = [HTS_Engine_ptr, c_int]
- libjt.HTS_Engine_set_audio_buff_size(engine, 1600)
-
- libjt.HTS_Engine_set_msd_threshold.argtypes = [HTS_Engine_ptr, c_int, c_double]
- libjt.HTS_Engine_set_msd_threshold(engine, 1, 0.5)
-
- libjt.HTS_Engine_set_gv_weight.argtypes = [HTS_Engine_ptr, c_int, c_double]
- libjt.HTS_Engine_set_gv_weight(engine, 0, 1.0)
- libjt.HTS_Engine_set_gv_weight(engine, 1, 0.7)
- if use_lpf:
- libjt.HTS_Engine_set_gv_weight(engine, 2, 1.0)
-
- # for libjt_synthesis()
- libjt.mecab2njd.argtypes = [NJD_ptr, FEATURE_ptr_array_ptr, c_int]
- libjt.njd_set_pronunciation.argtypes = [NJD_ptr]
- libjt.njd_set_digit.argtypes = [NJD_ptr]
- libjt.njd_set_accent_phrase.argtypes = [NJD_ptr]
- libjt.njd_set_accent_type.argtypes = [NJD_ptr]
- libjt.njd_set_unvoiced_vowel.argtypes = [NJD_ptr]
- libjt.njd_set_long_vowel.argtypes = [NJD_ptr]
- libjt.njd2jpcommon.argtypes = [JPCommon_ptr, NJD_ptr]
- libjt.JPCommon_make_label.argtypes = [JPCommon_ptr]
- libjt.JPCommon_get_label_size.argtypes = [JPCommon_ptr]
- libjt.JPCommon_get_label_size.argtypes = [JPCommon_ptr]
- libjt.JPCommon_get_label_feature.argtypes = [JPCommon_ptr]
- libjt.JPCommon_get_label_feature.restype = c_char_p_p
- libjt.JPCommon_get_label_size.argtypes = [JPCommon_ptr]
- libjt.HTS_Engine_load_label_from_string_list.argtypes = [
- HTS_Engine_ptr, c_char_p_p, c_int]
-
- libjt.HTS_Engine_create_sstream.argtypes = [HTS_Engine_ptr]
- libjt.HTS_Engine_create_pstream.argtypes = [HTS_Engine_ptr]
- libjt.HTS_Engine_create_gstream.argtypes = [HTS_Engine_ptr]
- libjt.HTS_Engine_refresh.argtypes = [HTS_Engine_ptr]
- libjt.JPCommon_refresh.argtypes = [JPCommon_ptr]
- libjt.NJD_refresh.argtypes = [NJD_ptr]
- libjt.HTS_GStreamSet_get_total_nsample.argtypes = [HTS_GStreamSet_ptr]
- libjt.HTS_GStreamSet_get_speech.argtypes = [HTS_GStreamSet_ptr, c_int]
- libjt.NJD_print.argtypes = [NJD_ptr]
- libjt.JPCommon_print.argtypes = [JPCommon_ptr]
- libjt.JPCommonLabel_print.argtypes = [JPCommonLabel_ptr]
-
- libjt.jt_total_nsample.argtypes = [HTS_Engine_ptr]
- libjt.jt_speech_ptr.argtypes = [HTS_Engine_ptr]
- libjt.jt_speech_ptr.restype = c_short_p
- libjt.jt_save_logs.argtypes = [c_char_p, HTS_Engine_ptr, NJD_ptr]
- libjt.jt_save_riff.argtypes = [c_char_p, HTS_Engine_ptr]
- libjt.jt_speech_normalize.argtypes = [HTS_Engine_ptr, c_short, c_int]
- libjt.jt_trim_silence.argtypes = [HTS_Engine_ptr, c_short, c_short]
- libjt.jt_trim_silence.restype = c_int
-
- libjt.NJD_clear.argtypes = [NJD_ptr]
- libjt.JPCommon_clear.argtypes = [JPCommon_ptr]
- libjt.HTS_Engine_clear.argtypes = [HTS_Engine_ptr]
-
- libjt.HTS_Engine_set_lf0_offset_amp.argtypes = [HTS_Engine_ptr, c_double, c_double]
-
- # for libjt_jpcommon_make_label()
- libjt.JPCommonLabel_clear.argtypes = [JPCommonLabel_ptr]
- libjt.JPCommonLabel_initialize.argtypes = [JPCommonLabel_ptr]
- libjt.JPCommonNode_get_pron.restype = c_char_p
- libjt.JPCommonNode_get_pos.restype = c_char_p
- libjt.JPCommonNode_get_ctype.restype = c_char_p
- libjt.JPCommonNode_get_cform.restype = c_char_p
- libjt.JPCommonNode_get_acc.restype = c_int
- libjt.JPCommonNode_get_chain_flag.restype = c_int
- libjt.JPCommonLabel_push_word.argtype = [JPCommonLabel_ptr, c_char_p, c_char_p, c_char_p, c_char_p, c_int, c_int]
-
-def libjt_load(VOICE):
- global libjt, engine, use_lpf
- VOICE = VOICE.encode('mbcs')
- libjt.HTS_Engine_load_duration_from_fn.argtypes = [
- HTS_Engine_ptr, FILENAME_ptr_ptr, FILENAME_ptr_ptr, c_int]
-
- fn_ms_dur_buf = create_string_buffer(os.path.join(VOICE, "dur.pdf"))
- fn_ms_dur_buf_ptr = cast(byref(fn_ms_dur_buf), FILENAME_ptr)
- fn_ms_dur = cast(byref(fn_ms_dur_buf_ptr), FILENAME_ptr_ptr)
- fn_ts_dur_buf = create_string_buffer(os.path.join(VOICE, "tree-dur.inf"))
- fn_ts_dur_buf_ptr = cast(byref(fn_ts_dur_buf), FILENAME_ptr)
- fn_ts_dur = cast(byref(fn_ts_dur_buf_ptr), FILENAME_ptr_ptr)
- libjt.HTS_Engine_load_duration_from_fn(engine, fn_ms_dur, fn_ts_dur, 1)
-
- libjt.HTS_Engine_load_parameter_from_fn.argtypes = [
- HTS_Engine_ptr, FILENAME_ptr_ptr, FILENAME_ptr_ptr,
- FILENAME_ptr_x3_ptr, c_int, c_int, c_int, c_int]
-
- fn_ms_mcp_buf = create_string_buffer(os.path.join(VOICE, "mgc.pdf"))
- fn_ms_mcp_buf_ptr = cast(byref(fn_ms_mcp_buf), FILENAME_ptr)
- fn_ms_mcp = cast(byref(fn_ms_mcp_buf_ptr), FILENAME_ptr_ptr)
- fn_ts_mcp_buf = create_string_buffer(os.path.join(VOICE, "tree-mgc.inf"))
- fn_ts_mcp_buf_ptr = cast(byref(fn_ts_mcp_buf), FILENAME_ptr)
- fn_ts_mcp = cast(byref(fn_ts_mcp_buf_ptr), FILENAME_ptr_ptr)
- fn_ws_mcp_buf_1 = create_string_buffer(os.path.join(VOICE, "mgc.win1"))
- fn_ws_mcp_buf_2 = create_string_buffer(os.path.join(VOICE, "mgc.win2"))
- fn_ws_mcp_buf_3 = create_string_buffer(os.path.join(VOICE, "mgc.win3"))
- fn_ws_mcp_buf_ptr_x3 = FILENAME_ptr_x3(
- cast(byref(fn_ws_mcp_buf_1), FILENAME_ptr),
- cast(byref(fn_ws_mcp_buf_2), FILENAME_ptr),
- cast(byref(fn_ws_mcp_buf_3), FILENAME_ptr))
- fn_ws_mcp = cast(byref(fn_ws_mcp_buf_ptr_x3), FILENAME_ptr_x3_ptr)
- libjt.HTS_Engine_load_parameter_from_fn(
- engine, fn_ms_mcp, fn_ts_mcp, fn_ws_mcp,
- 0, 0, 3, 1)
-
- fn_ms_lf0_buf = create_string_buffer(os.path.join(VOICE, "lf0.pdf"))
- fn_ms_lf0_buf_ptr = cast(byref(fn_ms_lf0_buf), FILENAME_ptr)
- fn_ms_lf0 = cast(byref(fn_ms_lf0_buf_ptr), FILENAME_ptr_ptr)
- fn_ts_lf0_buf = create_string_buffer(os.path.join(VOICE, "tree-lf0.inf"))
- fn_ts_lf0_buf_ptr = cast(byref(fn_ts_lf0_buf), FILENAME_ptr)
- fn_ts_lf0 = cast(byref(fn_ts_lf0_buf_ptr), FILENAME_ptr_ptr)
- fn_ws_lf0_buf_1 = create_string_buffer(os.path.join(VOICE, "lf0.win1"))
- fn_ws_lf0_buf_2 = create_string_buffer(os.path.join(VOICE, "lf0.win2"))
- fn_ws_lf0_buf_3 = create_string_buffer(os.path.join(VOICE, "lf0.win3"))
- fn_ws_lf0_buf_ptr_x3 = FILENAME_ptr_x3(
- cast(byref(fn_ws_lf0_buf_1), FILENAME_ptr),
- cast(byref(fn_ws_lf0_buf_2), FILENAME_ptr),
- cast(byref(fn_ws_lf0_buf_3), FILENAME_ptr))
- fn_ws_lf0 = cast(byref(fn_ws_lf0_buf_ptr_x3), FILENAME_ptr_x3_ptr)
- libjt.HTS_Engine_load_parameter_from_fn(
- engine, fn_ms_lf0, fn_ts_lf0, fn_ws_lf0,
- 1, 1, 3, 1)
-
- if use_lpf:
- fn_ms_lpf_buf = create_string_buffer(os.path.join(VOICE, "lpf.pdf"))
- fn_ms_lpf_buf_ptr = cast(byref(fn_ms_lpf_buf), FILENAME_ptr)
- fn_ms_lpf = cast(byref(fn_ms_lpf_buf_ptr), FILENAME_ptr_ptr)
- fn_ts_lpf_buf = create_string_buffer(os.path.join(VOICE, "tree-lpf.inf"))
- fn_ts_lpf_buf_ptr = cast(byref(fn_ts_lpf_buf), FILENAME_ptr)
- fn_ts_lpf = cast(byref(fn_ts_lpf_buf_ptr), FILENAME_ptr_ptr)
- fn_ws_lpf_buf_1 = create_string_buffer(os.path.join(VOICE, "lpf.win1"))
- fn_ws_lpf_buf_ptr_x3 = FILENAME_ptr_x3(
- cast(byref(fn_ws_lpf_buf_1), FILENAME_ptr),
- cast(0, FILENAME_ptr),
- cast(0, FILENAME_ptr))
- fn_ws_lpf = cast(byref(fn_ws_lpf_buf_ptr_x3), FILENAME_ptr_x3_ptr)
- libjt.HTS_Engine_load_parameter_from_fn(engine, fn_ms_lpf, fn_ts_lpf, fn_ws_lpf, 2, 0, 1, 1)
-
- libjt.HTS_Engine_load_gv_from_fn.argtypes = [
- HTS_Engine_ptr, FILENAME_ptr_ptr, FILENAME_ptr_ptr,
- c_int, c_int]
-
- fn_ms_gvm_buf = create_string_buffer(os.path.join(VOICE, "gv-mgc.pdf"))
- fn_ms_gvm_buf_ptr = cast(byref(fn_ms_gvm_buf), FILENAME_ptr)
- fn_ms_gvm = cast(byref(fn_ms_gvm_buf_ptr), FILENAME_ptr_ptr)
- fn_ts_gvm_buf = create_string_buffer(os.path.join(VOICE, "tree-gv-mgc.inf"))
- fn_ts_gvm_buf_ptr = cast(byref(fn_ts_gvm_buf), FILENAME_ptr)
- fn_ts_gvm = cast(byref(fn_ts_gvm_buf_ptr), FILENAME_ptr_ptr)
- libjt.HTS_Engine_load_gv_from_fn(
- engine, fn_ms_gvm, fn_ts_gvm, 0, 1)
-
- fn_ms_gvl_buf = create_string_buffer(os.path.join(VOICE, "gv-lf0.pdf"))
- fn_ms_gvl_buf_ptr = cast(byref(fn_ms_gvl_buf), FILENAME_ptr)
- fn_ms_gvl = cast(byref(fn_ms_gvl_buf_ptr), FILENAME_ptr_ptr)
- fn_ts_gvl_buf = create_string_buffer(os.path.join(VOICE, "tree-gv-lf0.inf"))
- fn_ts_gvl_buf_ptr = cast(byref(fn_ts_gvl_buf), FILENAME_ptr)
- fn_ts_gvl = cast(byref(fn_ts_gvl_buf_ptr), FILENAME_ptr_ptr)
- libjt.HTS_Engine_load_gv_from_fn(
- engine, fn_ms_gvl, fn_ts_gvl, 1, 1)
-
- libjt.HTS_Engine_load_gv_switch_from_fn.argtypes = [
- HTS_Engine_ptr, FILENAME_ptr]
-
- fn_gv_switch_buf = create_string_buffer(os.path.join(VOICE, "gv-switch.inf"))
- fn_gv_switch = cast(byref(fn_gv_switch_buf), FILENAME_ptr)
- libjt.HTS_Engine_load_gv_switch_from_fn(
- engine, fn_gv_switch)
-
-def libjt_refresh():
- libjt.HTS_Engine_refresh(engine)
- libjt.JPCommon_refresh(jpcommon)
- libjt.NJD_refresh(njd)
-
-def libjt_clear():
- libjt.NJD_clear(njd)
- libjt.JPCommon_clear(jpcommon)
- libjt.HTS_Engine_clear(engine)
-
-#def libjt_jpcommon_make_label(jpcommon, logwrite_=None):
-# if jpcommon.label:
-# libjt.JPCommonLabel_clear(jpcommon.label)
-# else:
-# jpcommon.label = cast(mc_calloc(1, sizeof(JPCommonLabel)), JPCommonLabel_ptr)
-# libjt.JPCommonLabel_initialize(jpcommon.label)
-# node = jpcommon.head
-# while node:
-# label = jpcommon.label
-# pron = libjt.JPCommonNode_get_pron(node)
-# pos = libjt.JPCommonNode_get_pos(node)
-# ctype = libjt.JPCommonNode_get_ctype(node)
-# cform = libjt.JPCommonNode_get_cform(node)
-# acc = libjt.JPCommonNode_get_acc(node)
-# flag = libjt.JPCommonNode_get_chain_flag(node)
-# if logwrite_ : logwrite_('%s,%s,%d,%d' % (pron, pos, acc, flag))
-# libjt.JPCommonLabel_push_word(label, pron, pos, ctype, cform, acc, flag)
-# node = cast(node[0].next, JPCommonNode_ptr)
-# libjt.JPCommonLabel_make(jpcommon.label)
-
-def libjt_synthesis(feature, size, fperiod_=80, feed_func_=None, is_speaking_func_=None, thres_=32, thres2_=32, level_=32767, logwrite_=None, lf0_offset_=0.0, lf0_amp_=1.0):
- if feature is None or size is None: return None
- if logwrite_ : logwrite_('libjt_synthesis start.')
- try:
- libjt.HTS_Engine_set_lf0_offset_amp(engine, lf0_offset_, lf0_amp_)
- libjt.HTS_Engine_set_fperiod(engine, fperiod_) # 80(point=5ms) frame period
- libjt.mecab2njd(njd, feature, size)
- libjt.njd_set_pronunciation(njd)
- libjt.njd_set_digit(njd)
- libjt.njd_set_accent_phrase(njd)
- except WindowsError:
- if logwrite_ : logwrite_('libjt_synthesis error #1 ')
- # exception: access violation reading 0x00000000
- # https://github.com/nishimotz/libopenjtalk/commit/10d3abda6835e0547846fb5e12a36c1425561aaa#diff-66
- try:
- libjt.njd_set_accent_type(njd)
- except WindowsError:
- if logwrite_ : logwrite_('libjt_synthesis njd_set_accent_type() error ')
- try:
- libjt.njd_set_unvoiced_vowel(njd)
- libjt.njd_set_long_vowel(njd)
- libjt.njd2jpcommon(jpcommon, njd)
- libjt.JPCommon_make_label(jpcommon)
- except WindowsError:
- if logwrite_ : logwrite_('libjt_synthesis error #2 ')
- if is_speaking_func_ and not is_speaking_func_() :
- libjt_refresh()
- return None
- try:
- s = libjt.JPCommon_get_label_size(jpcommon)
- except WindowsError:
- if logwrite_ : logwrite_('libjt_synthesis JPCommon_get_label_size() error ')
- buf = None
- if s > 2:
- try:
- f = libjt.JPCommon_get_label_feature(jpcommon)
- libjt.HTS_Engine_load_label_from_string_list(engine, f, s)
- libjt.HTS_Engine_create_sstream(engine)
- libjt.HTS_Engine_create_pstream(engine)
- libjt.HTS_Engine_create_gstream(engine)
- except WindowsError:
- if logwrite_ : logwrite_('libjt_synthesis error #3 ')
- if is_speaking_func_ and not is_speaking_func_() :
- libjt_refresh()
- return None
- try:
- total_nsample = libjt.jt_trim_silence(engine, thres_, thres2_)
- libjt.jt_speech_normalize(engine, level_, total_nsample)
- speech_ptr = libjt.jt_speech_ptr(engine)
- byte_count = total_nsample * sizeof(c_short)
- buf = string_at(speech_ptr, byte_count)
- if feed_func_: feed_func_(buf)
- #libjt.jt_save_logs("_logfile", engine, njd)
- except WindowsError:
- if logwrite_ : logwrite_('libjt_synthesis error #5 ')
- if logwrite_ : logwrite_('libjt_synthesis done.')
- return buf
+++ /dev/null
-# _nvdajp_espeak.py \r
-# -*- coding: utf-8 -*-\r
-\r
-from __future__ import unicode_literals\r
-import translator2\r
-from logHandler import log\r
-import re\r
-import copy\r
-from _nvdajp_unicode import unicode_normalize\r
-from speech import CharacterModeCommand\r
-\r
-_logwrite = log.debug\r
-\r
-def isJapaneseLang(msg):\r
- for i in msg:\r
- c = ord(i)\r
- if (0x3040 <= c <= 0x30ff) or (0x3100 <= c <= 0x9fff):\r
- return True\r
- return False\r
-\r
-kanadic = None\r
-\r
-def load_kanadic():\r
- return [\r
- [re.compile('キュ'), 'cu'],\r
- [re.compile('キョ'), 'co'],\r
- [re.compile('ギャ'), 'ga'],\r
- [re.compile('ギュ'), 'gu'],\r
- [re.compile('ギョ'), 'go'],\r
-\r
- [re.compile('シャ'), 'sha'],\r
- [re.compile('シュ'), 'shu'],\r
- [re.compile('ショ'), 'sho'],\r
-\r
- [re.compile('ジャ'), 'jar'],\r
- [re.compile('ジュ'), 'ju'],\r
- [re.compile('ジョ'), 'jo'],\r
- [re.compile('ヂャ'), 'jar'],\r
- [re.compile('ヂュ'), 'ju'],\r
- [re.compile('ヂョ'), 'jo'],\r
- [re.compile('ニャ'), 'nyar'],\r
- [re.compile('ニュ'), 'new'],\r
- [re.compile('ニョ'), 'no'],\r
-\r
- [re.compile('ヒャ'), 'hyar'],\r
- [re.compile('ヒュ'), 'hu'],\r
- [re.compile('ヒョ'), 'ho'],\r
- [re.compile('ビャ'), 'bar'],\r
- [re.compile('ビュ'), 'bu'],\r
- [re.compile('ビョ'), 'bo'],\r
- [re.compile('ピャ'), 'pyar'],\r
- [re.compile('ピュ'), 'pew'],\r
- [re.compile('ピョ'), 'po'],\r
-\r
- [re.compile('ミャ'), 'ma'],\r
- [re.compile('ミュ'), 'mu'],\r
- [re.compile('ミョ'), 'mo'],\r
- [re.compile('リャ'), 'ra'],\r
- [re.compile('リュ'), 'ru'],\r
- [re.compile('リョ'), 'ro'],\r
-\r
- [re.compile('イェ'), 'yiay'],\r
- [re.compile('キェ'), 'kyay'],\r
- [re.compile('シェ'), 'shay'],\r
- [re.compile('チェ'), 'chay'],\r
- [re.compile('ニェ'), 'nyay'],\r
- [re.compile('ヒェ'), 'hyay'],\r
- [re.compile('スィ'), 'si'],\r
- [re.compile('ティ'), 'tee'],\r
- [re.compile('ジェ'), 'jay'],\r
- [re.compile('ズィ'), 'zee'],\r
- [re.compile('ディ'), 'di'],\r
- [re.compile('ウィ'), 'we'],\r
- [re.compile('ウェ'), 'way'],\r
- [re.compile('ウォ'), 'wo'],\r
- [re.compile('トゥ'), 'tu'],\r
- [re.compile('クァ'), 'kwa'],\r
- [re.compile('クィ'), 'kwee'],\r
- [re.compile('クェ'), 'kway'],\r
- [re.compile('クォ'), 'kwo'],\r
- [re.compile('ドゥ'), 'du'],\r
- [re.compile('グァ'), 'gwa'],\r
- [re.compile('グィ'), 'gwee'],\r
- [re.compile('グェ'), 'gway'],\r
- [re.compile('グォ'), 'gwo'],\r
- [re.compile('テュ'), 'tu'],\r
- [re.compile('フュ'), 'fu'],\r
- [re.compile('フョ'), 'fo'],\r
- [re.compile('ツァ'), 'tsar'],\r
- [re.compile('ツィ'), 'tsee'],\r
- [re.compile('ツェ'), 'tsay'],\r
- [re.compile('ツォ'), 'tso'],\r
- [re.compile('デュ'), 'du'],\r
- [re.compile('ヴュ'), 'vu'],\r
- [re.compile('ヴョ'), 'vo'],\r
- [re.compile('ファ'), 'far'],\r
- [re.compile('フィ'), 'fi'],\r
- [re.compile('フェ'), 'fe'],\r
- [re.compile('フォ'), 'fo'],\r
- [re.compile('ヴァ'), 'var'],\r
- [re.compile('ヴィ'), 'vee'],\r
- [re.compile('ヴェ'), 'vay'],\r
- [re.compile('ヴォ'), 'vo'],\r
- [re.compile('ヴ'), 'vu'],\r
-\r
- [re.compile('カ'), 'ca'],\r
- [re.compile('キ'), 'kee'],\r
- [re.compile('ク'), 'cu'],\r
- [re.compile('ケ'), 'kay'],\r
- [re.compile('コ'), 'co'],\r
-\r
- [re.compile('ガ'), 'ga'],\r
- [re.compile('ギ'), 'gi'],\r
- [re.compile('グ'), 'gu'],\r
- [re.compile('ゲ'), 'gay'],\r
- [re.compile('ゴ'), 'go'],\r
-\r
- [re.compile('サ'), 'sa'],\r
- [re.compile('シ'), 'shi'],\r
- [re.compile('ス'), 'su'],\r
- [re.compile('セ'), 'say'],\r
- [re.compile('ソ'), 'so'],\r
-\r
- [re.compile('ザ'), 'za'],\r
- [re.compile('ジ'), 'zee'],\r
- [re.compile('ズ'), 'zu'],\r
- [re.compile('ゼ'), 'zay'],\r
- [re.compile('ゾ'), 'zo'],\r
-\r
- [re.compile('タ'), 'ta'],\r
- [re.compile('チ'), 'chee'],\r
- [re.compile('ツ'), 'tsu'],\r
- [re.compile('テ'), 'tay'],\r
- [re.compile('ト'), 'tau'],\r
-\r
- [re.compile('ダ'), 'da'],\r
- [re.compile('ヂ'), 'gee'],\r
- [re.compile('ヅ'), 'zu'],\r
- [re.compile('デ'), 'day'],\r
- [re.compile('ド'), 'dau'],\r
-\r
- [re.compile('ナ'), 'na'],\r
- [re.compile('ニ'), 'nee'],\r
- [re.compile('ヌ'), 'nu'],\r
- [re.compile('ネ'), 'nay'],\r
- [re.compile('ノ'), 'no'],\r
-\r
- [re.compile('ハ'), 'ha'],\r
- [re.compile('ヒ'), 'hee'],\r
- [re.compile('フ'), 'fu'],\r
- [re.compile('ヘ'), 'hay'],\r
- [re.compile('ホ'), 'ho'],\r
-\r
- [re.compile('バ'), 'ba'],\r
- [re.compile('ビ'), 'bee'],\r
- [re.compile('ブ'), 'boo'],\r
- [re.compile('ベ'), 'bay'],\r
- [re.compile('ボ'), 'bo'],\r
-\r
- [re.compile('パ'), 'pa'],\r
- [re.compile('ピ'), 'pee'],\r
- [re.compile('プ'), 'pu'],\r
- [re.compile('ペ'), 'pay'],\r
- [re.compile('ポ'), 'po'],\r
-\r
- [re.compile('マ'), 'ma'],\r
- [re.compile('ミ'), 'mee'],\r
- [re.compile('ム'), 'mu'],\r
- [re.compile('メ'), 'may'],\r
- [re.compile('モ'), 'mo'],\r
-\r
- [re.compile('ヤ'), 'ya'],\r
- [re.compile('ユ'), 'yu'],\r
- [re.compile('ヨ'), 'yo'],\r
-\r
- [re.compile('ラ'), 'la'],\r
- [re.compile('リ'), ' lee'],\r
- [re.compile('ル'), 'lu'],\r
- [re.compile('レ'), ' lay'],\r
- [re.compile('ロ'), ' low'],\r
-\r
- [re.compile('ワ'), 'wa'],\r
- [re.compile('ヲ'), 'wo'],\r
-\r
- [re.compile('ン'), 'n '],\r
-\r
- [re.compile('ア'), ' ah '],\r
- [re.compile('イ'), ' ee '],\r
- [re.compile('ウ'), ' u '],\r
- [re.compile('エ'), ' a '],\r
- [re.compile('オ'), ' o '],\r
-\r
- [re.compile('ァ'), 'ah'],\r
- [re.compile('ィ'), 'ee'],\r
- [re.compile('ゥ'), 'u'],\r
- [re.compile('ェ'), 'a'],\r
- [re.compile('ォ'), 'o'],\r
-\r
- [re.compile('ャ'), 'ya'],\r
- [re.compile('ュ'), 'yu'],\r
- [re.compile('ョ'), 'yo'],\r
-\r
- [re.compile('ッ'), ' '],\r
- [re.compile('ー'), ' '],\r
-\r
- # Braille\r
- [re.compile('[\u2800-\u28ff]+'), ''],\r
-\r
- # Japanese & CJK\r
- [re.compile('[\u3040-\u9fff]+'), ''],\r
- ]\r
-\r
-def replaceJapanese(msg):\r
- if not translator2.mecab_initialized:\r
- translator2.initialize()\r
- msg = translator2.japanese_braille_separate(msg, _logwrite)[0]\r
-\r
- global kanadic\r
- if kanadic is None:\r
- kanadic = load_kanadic()\r
- for p in kanadic:\r
- try:\r
- msg = re.sub(p[0], p[1], msg)\r
- except:\r
- pass\r
- return msg\r
-\r
-def replaceJapaneseFromSpeechSequence(speechSequence):\r
- # we don't want to use CharacterMode for replaced Japanese text\r
- a = []\r
- charmode = False\r
- for item in speechSequence:\r
- disableCharMode = False\r
- if isinstance(item, basestring):\r
- item = unicode_normalize(item)\r
- if isJapaneseLang(item):\r
- item = replaceJapanese(item)\r
- if charmode:\r
- disableCharMode = True\r
- elif isinstance(item, CharacterModeCommand):\r
- cmstate = item.state\r
- if disableCharMode:\r
- a.append(CharacterModeCommand(False))\r
- a.append(item)\r
- if charmode:\r
- a.append(CharacterModeCommand(True))\r
- disableCharMode = False\r
- else:\r
- a.append(item)\r
- return a\r
+++ /dev/null
-# _nvdajp_jtalk.py
-# -*- coding: utf-8 -*-
-#A part of NonVisual Desktop Access (NVDA)
-# speech engine nvdajp_jtalk
-# Copyright (C) 2010-2012 Takuya Nishimoto (nishimotz.com)
-# based on Open JTalk (bin/open_jtalk.c) http://github.com/nishimotz/libopenjtalk/
-
-from logHandler import log
-import time
-import Queue
-import os
-import codecs
-import re
-import string
-import ctypes
-import baseObject
-import copy
-import nvwave
-from .. import _espeak
-import _jtalk_core
-import _nvdajp_predic
-from _nvdajp_unicode import unicode_normalize
-import _bgthread
-import sys
-import time
-import watchdog
-import config
-
-jtalk_dir = unicode(os.path.dirname(__file__), 'mbcs')
-if hasattr(sys,'frozen'):
- d = os.path.join(os.getcwdu(), 'synthDrivers', 'jtalk')
- if os.path.isdir(d):
- jtalk_dir = d
-
-DEBUG = False
-
-RATE_BOOST_MULTIPLIER = 1.5
-
-# math.log(150) = 5.0, math.log(350) = 5.86
-_jtalk_voices = [
- {"id": "V1",
- "name": "m001",
- "lang":"ja",
- "samp_rate": 48000,
- "fperiod": 240,
- "alpha": 0.55,
- "lf0_base": 5.0,
- "pitch_bias": 0,
- "use_lpf": 1,
- "speaker_attenuation": 1.0,
- "dir": "m001",
- "espeak_variant": "max"},
- {"id": "V2",
- "name": "mei",
- "lang":"ja",
- "samp_rate": 48000,
- "fperiod": 240,
- "alpha": 0.55,
- "lf0_base": 5.86,
- "pitch_bias": -10,
- "use_lpf": 1,
- "speaker_attenuation": 0.5,
- "dir": "mei_normal",
- "espeak_variant": "f1"},
- {"id": "V3",
- "name": "lite",
- "lang":"ja",
- "samp_rate": 16000,
- "fperiod": 80,
- "alpha": 0.42,
- "lf0_base": 5.0,
- "pitch_bias": 0,
- "use_lpf": 0,
- "speaker_attenuation": 1.0,
- "dir": "voice",
- "espeak_variant": "max"},
-]
-default_jtalk_voice = _jtalk_voices[1] # V2
-voice_args = None
-
-class VoiceProperty(baseObject.AutoPropertyObject):
- def __init__(self):
- super(VoiceProperty,self).__init__()
-
-# if samp_rate==16000: normal speed = 80samples period
-fperiod = 240
-
-# gain control
-max_level = 32000
-thres_level = 128
-thres2_level = 128
-speaker_attenuation = 1.0
-
-logwrite = None
-lastIndex = None
-currIndex = None
-lastIndex = None
-player = None
-currentEngine = 0 # 1:espeak 2:jtalk
-
-def isSpeaking():
- return _bgthread.isSpeaking
-
-def setSpeaking(b):
- _bgthread.isSpeaking = b
-
-def _jtalk_speak(msg, index=None, prop=None):
- if prop is None: return
- global currIndex, buff
- global currentEngine
- currIndex = index
- if prop.characterMode:
- fperiod_current = voice_args['fperiod']
- else:
- fperiod_current = fperiod
- msg = unicode_normalize(msg)
- msg = _nvdajp_predic.convert(msg)
- lw = None
- if DEBUG: lw = logwrite
- setSpeaking(True)
- currentEngine = 2
- if DEBUG: logwrite("p:%d i:%d msg:%s" % (prop.pitch, prop.inflection, msg))
- la = 0.020 * prop.inflection # 50 = original range
- ls = 0.015 * (prop.pitch - 50.0 + voice_args['pitch_bias']) # 50 = no shift
- lo = ls + voice_args['lf0_base'] * (1 - la)
- if DEBUG: logwrite("lo:%f la:%f" % (lo, la))
- for m in string.split(msg):
- try:
- if DEBUG: logwrite("unicode (%s)" % m)
- s = _jtalk_core.Mecab_text2mecab(m)
- if DEBUG: logwrite("utf-8 (%s)" % s.decode('utf-8', 'ignore'))
- if not isSpeaking(): _jtalk_core.libjt_refresh(); return
- mf = _jtalk_core.MecabFeatures()
- _jtalk_core.Mecab_analysis(s, mf, logwrite_=logwrite)
- if DEBUG: _jtalk_core.Mecab_print(mf, logwrite)
- _jtalk_core.Mecab_correctFeatures(mf)
- if DEBUG: _jtalk_core.Mecab_print(mf, logwrite)
- _jtalk_core.Mecab_utf8_to_cp932(mf)
- if DEBUG: _jtalk_core.Mecab_print(mf, logwrite, CODE_='cp932')
- if DEBUG: logwrite("Mecab_analysis done")
- if not isSpeaking(): _jtalk_core.libjt_refresh(); return
- _jtalk_core.libjt_synthesis(mf.feature, mf.size,
- fperiod_ = fperiod_current,
- feed_func_ = player.feed, # player.feed() is called inside
- is_speaking_func_ = isSpeaking,
- thres_ = thres_level,
- thres2_ = thres2_level,
- level_ = int(max_level * speaker_attenuation),
- logwrite_ = lw,
- lf0_offset_ = lo,
- lf0_amp_ = la)
- mf = None
- if DEBUG: logwrite("libjt_synthesis done")
- _jtalk_core.libjt_refresh()
- except WindowsError:
- if DEBUG: logwrite("WindowsError")
- player.sync()
- global lastIndex
- lastIndex = currIndex
- currIndex = None
- setSpeaking(False)
- currentEngine = 0
-
-espeakMark = 10000
-
-def _espeak_speak(msg, lang, index=None, prop=None):
- global currentEngine, lastIndex, espeakMark
- currentEngine = 1
- msg = unicode(msg)
- msg.translate({ord(u'\01'):None,ord(u'<'):u'<',ord(u'>'):u'>'})
- msg = u"<voice xml:lang=\"%s\">%s</voice>" % (lang, msg)
- msg += u"<mark name=\"%d\" />" % espeakMark
- _espeak.speak(msg)
- while currentEngine == 1 and _espeak.lastIndex != espeakMark:
- time.sleep(0.1)
- watchdog.alive()
- time.sleep(0.4)
- watchdog.alive()
- lastIndex = index
- currentEngine = 0
- espeakMark += 1
-
-# call from BgThread
-def _speak(arg):
- msg, lang, index, prop = arg
- if DEBUG: logwrite('[' + lang + ']' + msg)
- if DEBUG: logwrite("_speak(%s)" % msg)
- if lang == 'ja':
- _jtalk_speak(msg, index, prop)
- else:
- _espeak_speak(msg, lang, index, prop)
-
-def speak(msg, lang, index=None, voiceProperty_=None):
- msg = msg.strip()
- if len(msg) == 0: return
- if voiceProperty_ is None: return
- arg = [msg, lang, index, copy.deepcopy(voiceProperty_)]
- _bgthread.execWhenDone(_speak, arg, mustBeAsync=True)
-
-def stop():
- global currentEngine
- if currentEngine == 1:
- _espeak.stop()
- currentEngine = 0
- return
- # Kill all speech from now.
- # We still want parameter changes to occur, so requeue them.
- params = []
- stop_task_count = 0 # for log.info()
- try:
- while True:
- item = _bgthread.bgQueue.get_nowait() # [func, args, kwargs]
- if item[0] != _speak:
- params.append(item)
- else:
- stop_task_count = stop_task_count + 1
- _bgthread.bgQueue.task_done()
- except Queue.Empty:
- # Let the exception break us out of this loop, as queue.empty() is not reliable anyway.
- pass
- for item in params:
- _bgthread.bgQueue.put(item)
- setSpeaking(False)
- if DEBUG: logwrite("stop: %d task(s) stopping" % stop_task_count)
- player.stop()
- lastIndex = None
-
-def pause(switch):
- if currentEngine == 1:
- _espeak.pause(switch)
- elif currentEngine == 2:
- player.pause(switch)
-
-def initialize(voice = default_jtalk_voice):
- _espeak.initialize()
- _espeak.setVoiceByLanguage("en")
- _espeak.setVoiceAndVariant(variant=voice["espeak_variant"])
- log.info("jtalk using eSpeak version %s" % _espeak.info())
- global player, logwrite, voice_args
- global speaker_attenuation
- voice_args = voice
- speaker_attenuation = voice_args['speaker_attenuation']
- player = nvwave.WavePlayer(channels=1, samplesPerSec=voice_args['samp_rate'], bitsPerSample=16, outputDevice=config.conf["speech"]["outputDevice"])
- _bgthread.initialize()
-
- jt_dll = os.path.join(jtalk_dir, 'libopenjtalk.dll')
- log.info('jt_dll %s' % jt_dll)
- _jtalk_core.libjt_initialize(jt_dll, **voice_args)
- voice_dir = os.path.join(jtalk_dir, voice_args['dir'])
- if not os.path.isdir(voice_dir):
- log.error('%s is not voice directory.' % voice_dir)
- return
- log.debug('loading %s' % voice_dir)
- _jtalk_core.libjt_load(voice_dir)
- logwrite = log.debug
- _jtalk_core.Mecab_initialize(logwrite)
- _nvdajp_predic.setup()
- try:
- logwrite(_jtalk_core.libjt_version())
- except:
- logwrite("libopenjtalk version unavailable")
- if DEBUG: logwrite("jtalk for NVDA started. voice:" + voice_args['dir'])
-
-def terminate():
- global player
- stop()
- _bgthread.terminate()
- player.close()
- player = None
- _espeak.terminate()
-
-def get_rate(rateBoost):
- f = fperiod
- if not rateBoost:
- f = fperiod / RATE_BOOST_MULTIPLIER
- if voice_args['samp_rate'] == 16000:
- return int(160 - 2 * f)
- if voice_args['samp_rate'] == 48000:
- return int((240 - f) / 1.5)
- return 0
-
-def set_rate(rate, rateBoost):
- global fperiod
- if voice_args['samp_rate'] == 16000:
- fperiod = int(80 - int(rate) / 2) # 80..30
- if voice_args['samp_rate'] == 48000:
- fperiod = int(240 - 1.5 * int(rate)) # 240..90
- if not rateBoost:
- fperiod = int(fperiod * RATE_BOOST_MULTIPLIER)
-
-def set_volume(vol):
- global max_level, thres_level, thres2_level
- max_level = int(326.67 * int(vol) + 100) # 100..32767
- thres_level = 128
- thres2_level = 128
-
+++ /dev/null
-# _nvdajp_predic.py
-# -*- coding: utf-8 -*-
-# A part of speech engine nvdajp_jtalk
-# Copyright (C) 2010-2011 Takuya Nishimoto (nishimotz.com)
-
-import re
-
-predic = None
-
-def setup():
- global predic
- if predic is None:
- predic = load()
-
-def convert(msg):
- for p in predic:
- try:
- msg = re.sub(p[0], p[1], msg)
- except:
- pass
- msg = msg.lower()
- return msg
-
-def load():
- return [
- [re.compile(u'^ー$'), u'チョーオン'],
- [re.compile(u'^ン$'), u'ウン'],
- [re.compile(u'\\sー$'), u' チョーオン'],
- [re.compile(u'\\sン$'), u' ウン'],
-
- ## 人々 昔々 家々 山々
- [re.compile(u'(.)々'), u'\\1\\1'],
-
- ## isolated hiragana HA (mecab replaces to WA)
- ## は
- [re.compile(u'^は$'), u'ハ'],
- [re.compile(u'\\sは$'), u' ハ'],
-
- ## 59 名
- [re.compile(u'(\\d) 名'), u'\\1名'],
- ## 4行 ヨンコー -> ヨンギョー
- [re.compile(u'(\\d)行'), u'\\1ギョー'],
- ## 2 分前更新
- [re.compile(u'(\\d)+ 分前更新'), u'\\1分マエコーシン'],
-
- ## 1MB 10MB 1.2MB 0.5MB 321.0MB 123.45MB 2.7GB
- ## 1 MB 10 MB 1.2 MB 0.5 MB 321.0 MB 123.45 MB 2.7 GB
- [re.compile(u'(\\d+)\\s*KB'), u'\\1キロバイト'],
- [re.compile(u'(\\d+)\\s*MB'), u'\\1メガバイト'],
- [re.compile(u'(\\d+)\\s*GB'), u'\\1ギガバイト'],
- [re.compile(u'(\\d+)\\s*MHz'), u'\\1メガヘルツ'],
- [re.compile(u'(\\d+)\\s*GHz'), u'\\1ギガヘルツ'],
-
- ## 2013 年 1 月 2 日
- [re.compile(u'(\\d+)\\s+年\\s+(\\d+)\\s+月\\s+(\\d+)\\s+日'), u'\\1年\\2月\\3日'],
-
- ### zenkaku symbols convert
- ## 2011.03.11
- ## 1,234円
- [re.compile(u'.'), u'.'],
- [re.compile(u','), u','],
-
- ## 1,234
- ## 1,234,567
- ## 1,234,567,890
- ## 1,23 = ichi comma niju san
- ## 1,0 = ichi comma zero
- [re.compile(u'(\\d)\\,(\\d{3})'), u'\\1\\2'],
- [re.compile(u'(\\d{2})\\,(\\d{3})'), u'\\1\\2'],
- [re.compile(u'(\\d{3})\\,(\\d{3})'), u'\\1\\2'],
- [re.compile(u'(\\d)\\,(\\d{1,2})'), u'\\1カンマ\\2'],
-
- [re.compile(u'(\\d{1,4})\\.(\\d{1,4})\\.(\\d{1,4})\\.(\\d{1,4})'), u'\\1テン\\2テン\\3テン\\4'],
- [re.compile(u'(\\d{1,4})\\.(\\d{1,4})\\.(\\d{1,4})'), u'\\1テン\\2テン\\3'],
-
- # do not replace '0' after '.' to phonetic symbols (prepare)
- [re.compile(u'\\.0'), u'.00'],
-
- [re.compile(u'\\b0(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)'), u' 00 0\\1 0\\2 0\\3 0\\4 0\\5 0\\6 0\\7 0\\8 0\\9 '],
- [re.compile(u'\\b0(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)'), u' 00 0\\1 0\\2 0\\3 0\\4 0\\5 0\\6 0\\7 0\\8 '],
- [re.compile(u'\\b0(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)'), u' 00 0\\1 0\\2 0\\3 0\\4 0\\5 0\\6 0\\7 '],
- [re.compile(u'\\b0(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)'), u' 00 0\\1 0\\2 0\\3 0\\4 0\\5 0\\6 '],
- [re.compile(u'\\b0(\\d)(\\d)(\\d)(\\d)(\\d)'), u' 00 0\\1 0\\2 0\\3 0\\4 0\\5 '],
- [re.compile(u'\\b0(\\d)(\\d)(\\d)(\\d)'), u' 00 0\\1 0\\2 0\\3 0\\4 '],
- [re.compile(u'\\b0(\\d)(\\d)(\\d)'), u' 00 0\\1 0\\2 0\\3 '],
- [re.compile(u'\\b0(\\d)(\\d)'), u' 00 0\\1 0\\2 '],
- [re.compile(u'\\b0(\\d)'), u' 00 0\\1 '],
-
- [re.compile(u' 00'), u'ゼロ'],
- [re.compile(u' 01'), u'イチ'],
- [re.compile(u' 02'), u'ニー'],
- [re.compile(u' 03'), u'サン'],
- [re.compile(u' 04'), u'ヨン'],
- [re.compile(u' 05'), u'ゴー'],
- [re.compile(u' 06'), u'ロク'],
- [re.compile(u' 07'), u'ナナ'],
- [re.compile(u' 08'), u'ハチ'],
- [re.compile(u' 09'), u'キュー'],
-
- # do not replace '0' after '.' to phonetic symbols (finalize)
- [re.compile(u'\\.00'), u'.0'],
- ]
-
+++ /dev/null
-# _nvdajp_spellchar.py
-# -*- coding: utf-8 -*-
-#A part of NonVisual Desktop Access (NVDA)
-#Copyright (C) 2010-2011 Takuya Nishimoto (nishimotz.com)
-#This file is covered by the GNU General Public License.
-#See the file COPYING for more details.
-
-# workaround for msspeech Haruka with "Use spelling functionality"
-
-import re
-_dic = None
-
-def init():
- global _dic
- if _dic : return
- _dic = [
- ### zenkaku alphabet convert
- [re.compile(u'A'), u'A'],
- [re.compile(u'B'), u'B'],
- [re.compile(u'C'), u'C'],
- [re.compile(u'D'), u'D'],
- [re.compile(u'E'), u'E'],
- [re.compile(u'F'), u'F'],
- [re.compile(u'G'), u'G'],
- [re.compile(u'H'), u'H'],
- [re.compile(u'I'), u'I'],
- [re.compile(u'J'), u'J'],
- [re.compile(u'K'), u'K'],
- [re.compile(u'L'), u'L'],
- [re.compile(u'M'), u'M'],
- [re.compile(u'N'), u'N'],
- [re.compile(u'O'), u'O'],
- [re.compile(u'P'), u'P'],
- [re.compile(u'Q'), u'Q'],
- [re.compile(u'R'), u'R'],
- [re.compile(u'S'), u'S'],
- [re.compile(u'T'), u'T'],
- [re.compile(u'U'), u'U'],
- [re.compile(u'V'), u'V'],
- [re.compile(u'W'), u'W'],
- [re.compile(u'X'), u'X'],
- [re.compile(u'Y'), u'Y'],
- [re.compile(u'Z'), u'Z'],
-
- [re.compile(u'a'), u'a'],
- [re.compile(u'b'), u'b'],
- [re.compile(u'c'), u'c'],
- [re.compile(u'd'), u'd'],
- [re.compile(u'e'), u'e'],
- [re.compile(u'f'), u'f'],
- [re.compile(u'g'), u'g'],
- [re.compile(u'h'), u'h'],
- [re.compile(u'i'), u'i'],
- [re.compile(u'j'), u'j'],
- [re.compile(u'k'), u'k'],
- [re.compile(u'l'), u'l'],
- [re.compile(u'm'), u'm'],
- [re.compile(u'n'), u'n'],
- [re.compile(u'o'), u'o'],
- [re.compile(u'p'), u'p'],
- [re.compile(u'q'), u'q'],
- [re.compile(u'r'), u'r'],
- [re.compile(u's'), u's'],
- [re.compile(u't'), u't'],
- [re.compile(u'u'), u'u'],
- [re.compile(u'v'), u'v'],
- [re.compile(u'w'), u'w'],
- [re.compile(u'x'), u'x'],
- [re.compile(u'y'), u'y'],
- [re.compile(u'z'), u'z'],
-
- ### zenkaku numbers convert
- [re.compile(u'0'), u'0'],
- [re.compile(u'1'), u'1'],
- [re.compile(u'2'), u'2'],
- [re.compile(u'3'), u'3'],
- [re.compile(u'4'), u'4'],
- [re.compile(u'5'), u'5'],
- [re.compile(u'6'), u'6'],
- [re.compile(u'7'), u'7'],
- [re.compile(u'8'), u'8'],
- [re.compile(u'9'), u'9'],
-
- [re.compile(u'0'), u'ゼロ '],
- [re.compile(u'1'), u'イチ '],
- [re.compile(u'2'), u'ニイ '],
- [re.compile(u'3'), u'サン '],
- [re.compile(u'4'), u'ヨン '],
- [re.compile(u'5'), u'ゴオ '],
- [re.compile(u'6'), u'ロク '],
- [re.compile(u'7'), u'ナナ '],
- [re.compile(u'8'), u'ハチ '],
- [re.compile(u'9'), u'キュウ '],
-
- [re.compile(u'(a|A)'), u'エイ '],
- [re.compile(u'(b|B)'), u'ビイー '],
- [re.compile(u'(c|C)'), u'シイ '],
- [re.compile(u'(d|D)'), u'ディイ '],
- [re.compile(u'(e|E)'), u'イイー '],
- [re.compile(u'(f|F)'), u'エフ '],
- [re.compile(u'(g|G)'), u'ジイ '],
- [re.compile(u'(h|H)'), u'エイチ '],
- [re.compile(u'(i|I)'), u'アイ '],
- [re.compile(u'(j|J)'), u'ジェイ '],
- [re.compile(u'(k|K)'), u'ケイ '],
- [re.compile(u'(l|L)'), u'エル '],
- [re.compile(u'(m|M)'), u'エム '],
- [re.compile(u'(n|N)'), u'エヌ '],
- [re.compile(u'(o|O)'), u'オオ '],
- [re.compile(u'(p|P)'), u'ピイイ '],
- [re.compile(u'(q|Q)'), u'キュウ '],
- [re.compile(u'(r|R)'), u'アール '],
- [re.compile(u'(s|S)'), u'エス '],
- [re.compile(u'(t|T)'), u'ティイ '],
- [re.compile(u'(u|U)'), u'ユウ '],
- [re.compile(u'(v|V)'), u'ブイ '],
- [re.compile(u'(w|W)'), u'ダブリュウ '],
- [re.compile(u'(x|X)'), u'エックス '],
- [re.compile(u'(y|Y)'), u'ワイ '],
- [re.compile(u'(z|Z)'), u'ゼッド '],
- ]
-
-def convert(msg):
- global _dic
- if _dic is None: init()
- for p in _dic:
- try:
- msg = re.sub(p[0], p[1], msg)
- except:
- pass
- return msg
+++ /dev/null
-# _nvdajp_unicode.py \r
-# -*- coding: utf-8 -*-\r
-\r
-from __future__ import unicode_literals\r
-import unicodedata\r
-\r
-def unicode_normalize(s):\r
- s = s.replace('\ufffd', '') # Unicode REPLACEMENT CHARACTER\r
- s = s.replace('\u200e', '') # Unicode LEFT-TO-RIGHT MARK\r
- s = s.replace('\u200f', '') # Unicode RIGHT-TO-LEFT MARK\r
- # Mecab_text2mecab() で全角に変換され NFKC で戻せない文字\r
- s = s.replace('.', '.')\r
- s = unicodedata.normalize('NFKC', s)\r
- s = s.replace('\u2212', '-') # 0x2212 MUNUS SIGN to 0x002D HYPHEN-MINUS\r
- s = s.replace('\u00a5', '\\') # 0x00A5 YEN SIGN\r
- s = s.replace('\u301c', '~') # 0x301C WAVE DASH\r
- return s\r
-\r
+++ /dev/null
-# coding: UTF-8\r
-#nvdajptext/mecab.py \r
-#A part of NonVisual Desktop Access (NVDA)\r
-#This file is covered by the GNU General Public License.\r
-#See the file COPYING for more details.\r
-#Copyright (C) 2010-2012 Masataka.Shinke, Takuya Nishimoto\r
-\r
-CODE = 'utf-8'\r
-\r
-from ctypes import *\r
-import codecs\r
-import re\r
-import string\r
-import os\r
-import struct\r
-import unicodedata\r
-import threading\r
-import sys\r
-\r
-DEFAULT_JTALK_DIR = unicode(os.path.dirname(__file__), 'mbcs')\r
-if hasattr(sys,'frozen'):\r
- d = os.path.join(os.getcwdu(), 'synthDrivers', 'jtalk')\r
- if os.path.isdir(d):\r
- DEFAULT_JTALK_DIR = d\r
-\r
-c_double_p = POINTER(c_double)\r
-c_double_p_p = POINTER(c_double_p) \r
-c_short_p = POINTER(c_short)\r
-c_char_p_p = POINTER(c_char_p) \r
-\r
-##############################################\r
-\r
-# http://mecab.sourceforge.net/libmecab.html\r
-# c:/mecab/sdk/mecab.h\r
-MECAB_NOR_NODE = 0\r
-MECAB_UNK_NODE = 1\r
-MECAB_BOS_NODE = 2\r
-MECAB_EOS_NODE = 3\r
-class mecab_token_t(Structure):\r
- pass\r
-mecab_token_t_ptr = POINTER(mecab_token_t)\r
-\r
-class mecab_path_t(Structure):\r
- pass\r
-mecab_path_t_ptr = POINTER(mecab_path_t)\r
-\r
-class mecab_node_t(Structure):\r
- pass\r
-mecab_node_t_ptr = POINTER(mecab_node_t)\r
-mecab_node_t_ptr_ptr = POINTER(mecab_node_t_ptr)\r
-mecab_node_t._fields_ = [\r
- ("prev", mecab_node_t_ptr),\r
- ("next", mecab_node_t_ptr),\r
- ("enext", mecab_node_t_ptr),\r
- ("bnext", mecab_node_t_ptr),\r
- ("rpath", mecab_path_t_ptr),\r
- ("lpath", mecab_path_t_ptr),\r
- # ("begin_node_list", mecab_node_t_ptr_ptr),\r
- # ("end_node_list", mecab_node_t_ptr_ptr),\r
- ("surface", c_char_p),\r
- ("feature", c_char_p),\r
- ("id", c_uint),\r
- ("length", c_ushort),\r
- ("rlength", c_ushort),\r
- ("rcAttr", c_ushort),\r
- ("lcAttr", c_ushort),\r
- ("posid", c_ushort),\r
- ("char_type", c_ubyte),\r
- ("stat", c_ubyte),\r
- ("isbest", c_ubyte),\r
- # ("sentence_length", c_uint),\r
- ("alpha", c_float),\r
- ("beta", c_float),\r
- ("prob", c_float),\r
- ("wcost", c_short),\r
- ("cost", c_long),\r
- # ("token", mecab_token_t_ptr),\r
- ]\r
-\r
-############################################\r
-\r
-# typedef struct _Mecab{\r
-# char **feature;\r
-# int size;\r
-# mecab_t *mecab;\r
-# } Mecab;\r
-\r
-FELEN = 1000 # string len\r
-FECOUNT = 1000\r
-FEATURE = c_char * FELEN\r
-FEATURE_ptr = POINTER(FEATURE)\r
-FEATURE_ptr_array = FEATURE_ptr * FECOUNT\r
-FEATURE_ptr_array_ptr = POINTER(FEATURE_ptr_array)\r
-\r
-mecab = None\r
-libmc = None\r
-lock = threading.Lock()\r
-\r
-mc_malloc = cdll.msvcrt.malloc\r
-mc_malloc.restype = POINTER(c_ubyte)\r
-mc_calloc = cdll.msvcrt.calloc\r
-mc_calloc.restype = POINTER(c_ubyte)\r
-mc_free = cdll.msvcrt.free\r
-\r
-class NonblockingMecabFeatures(object):\r
- def __init__(self):\r
- self.size = 0\r
- self.feature = FEATURE_ptr_array()\r
- for i in xrange(0, FECOUNT):\r
- buf = mc_malloc(FELEN) \r
- self.feature[i] = cast(buf, FEATURE_ptr)\r
-\r
- def __del__(self):\r
- for i in xrange(0, FECOUNT):\r
- try:\r
- mc_free(self.feature[i]) \r
- except:\r
- pass\r
-\r
-class MecabFeatures(NonblockingMecabFeatures):\r
- def __init__(self):\r
- global lock\r
- lock.acquire()\r
- super(MecabFeatures, self).__init__()\r
-\r
- def __del__(self):\r
- global lock\r
- super(MecabFeatures, self).__del__()\r
- lock.release()\r
-\r
-predic = None\r
-\r
-def text2mecab_setup():\r
- global predic\r
- if predic is None:\r
- predic = [\r
- [re.compile(u" "), u" "],\r
- [re.compile(u"!"), u"!"],\r
- [re.compile(u"\""), u"”"],\r
- [re.compile(u"#"), u"#"],\r
- [re.compile(u"\\$"), u"$"],\r
- [re.compile(u"%"), u"%"],\r
- [re.compile(u"&"), u"&"],\r
- [re.compile(u"'"), u"’"],\r
- [re.compile(u"\\("), u"("],\r
- [re.compile(u"\\)"), u")"],\r
- [re.compile(u"\\*"), u"*"],\r
- [re.compile(u"\\+"), u"+"],\r
- [re.compile(u","), u","],\r
- [re.compile(u"\\-"), u"−"],\r
- [re.compile(u"\\."), u"."],\r
- [re.compile(u"\\/"), u"/"],\r
- [re.compile(u"0"), u"0"],\r
- [re.compile(u"1"), u"1"],\r
- [re.compile(u"2"), u"2"],\r
- [re.compile(u"3"), u"3"],\r
- [re.compile(u"4"), u"4"],\r
- [re.compile(u"5"), u"5"],\r
- [re.compile(u"6"), u"6"],\r
- [re.compile(u"7"), u"7"],\r
- [re.compile(u"8"), u"8"],\r
- [re.compile(u"9"), u"9"],\r
- [re.compile(u":"), u":"],\r
- [re.compile(u";"), u";"],\r
- [re.compile(u"<"), u"<"],\r
- [re.compile(u"="), u"="],\r
- [re.compile(u">"), u">"],\r
- [re.compile(u"\?"), u"?"],\r
- [re.compile(u"@"), u"@"],\r
- [re.compile(u"A"), u"A"],\r
- [re.compile(u"B"), u"B"],\r
- [re.compile(u"C"), u"C"],\r
- [re.compile(u"D"), u"D"],\r
- [re.compile(u"E"), u"E"],\r
- [re.compile(u"F"), u"F"],\r
- [re.compile(u"G"), u"G"],\r
- [re.compile(u"H"), u"H"],\r
- [re.compile(u"I"), u"I"],\r
- [re.compile(u"J"), u"J"],\r
- [re.compile(u"K"), u"K"],\r
- [re.compile(u"L"), u"L"],\r
- [re.compile(u"M"), u"M"],\r
- [re.compile(u"N"), u"N"],\r
- [re.compile(u"O"), u"O"],\r
- [re.compile(u"P"), u"P"],\r
- [re.compile(u"Q"), u"Q"],\r
- [re.compile(u"R"), u"R"],\r
- [re.compile(u"S"), u"S"],\r
- [re.compile(u"T"), u"T"],\r
- [re.compile(u"U"), u"U"],\r
- [re.compile(u"V"), u"V"],\r
- [re.compile(u"W"), u"W"],\r
- [re.compile(u"X"), u"X"],\r
- [re.compile(u"Y"), u"Y"],\r
- [re.compile(u"Z"), u"Z"],\r
- [re.compile(u"\\["), u"["],\r
- [re.compile(u"\\\\"), u"¥"],\r
- [re.compile(u"\\]"), u"]"],\r
- [re.compile(u"\\^"), u"^"],\r
- [re.compile(u"_"), u"_"],\r
- [re.compile(u"`"), u"‘"],\r
- [re.compile(u"a"), u"a"],\r
- [re.compile(u"b"), u"b"],\r
- [re.compile(u"c"), u"c"],\r
- [re.compile(u"d"), u"d"],\r
- [re.compile(u"e"), u"e"],\r
- [re.compile(u"f"), u"f"],\r
- [re.compile(u"g"), u"g"],\r
- [re.compile(u"h"), u"h"],\r
- [re.compile(u"i"), u"i"],\r
- [re.compile(u"j"), u"j"],\r
- [re.compile(u"k"), u"k"],\r
- [re.compile(u"l"), u"l"],\r
- [re.compile(u"m"), u"m"],\r
- [re.compile(u"n"), u"n"],\r
- [re.compile(u"o"), u"o"],\r
- [re.compile(u"p"), u"p"],\r
- [re.compile(u"q"), u"q"],\r
- [re.compile(u"r"), u"r"],\r
- [re.compile(u"s"), u"s"],\r
- [re.compile(u"t"), u"t"],\r
- [re.compile(u"u"), u"u"],\r
- [re.compile(u"v"), u"v"],\r
- [re.compile(u"w"), u"w"],\r
- [re.compile(u"x"), u"x"],\r
- [re.compile(u"y"), u"y"],\r
- [re.compile(u"z"), u"z"],\r
- [re.compile(u"{"), u"{"],\r
- [re.compile(u"\\|"), u"|"],\r
- [re.compile(u"}"), u"}"],\r
- [re.compile(u"~"), u"〜"],\r
- ]\r
-\r
-def text2mecab_convert(s):\r
- for p in predic:\r
- try:\r
- s = re.sub(p[0], p[1], s)\r
- except:\r
- pass\r
- return s\r
-\r
-def Mecab_text2mecab(txt, CODE_=CODE):\r
- text2mecab_setup()\r
- txt = unicodedata.normalize('NFKC', txt)\r
- txt = text2mecab_convert(txt)\r
- return txt.encode(CODE_, 'ignore')\r
-\r
-def Mecab_initialize(logwrite_ = None, jtalk_dir = DEFAULT_JTALK_DIR):\r
- #if logwrite_: logwrite_('mecab init begin')\r
- mecab_dll = os.path.join(jtalk_dir, 'libmecab.dll')\r
- global libmc\r
- if libmc is None:\r
- libmc = cdll.LoadLibrary(mecab_dll.encode('mbcs'))\r
- libmc.mecab_version.restype = c_char_p\r
- libmc.mecab_strerror.restype = c_char_p\r
- libmc.mecab_sparse_tonode.restype = mecab_node_t_ptr\r
- libmc.mecab_new.argtypes = [c_int, c_char_p_p]\r
- global mecab\r
- if mecab is None:\r
- dic = os.path.join(jtalk_dir, 'dic')\r
- if logwrite_: logwrite_('dic: %s' % dic)\r
- f = open(os.path.join(dic, "DIC_VERSION"))\r
- s = f.read().strip()\r
- f.close()\r
- logwrite_('mecab:' + libmc.mecab_version() + ' ' + s)\r
- # check utf-8 dictionary\r
- if not CODE in s:\r
- raise RuntimeError('utf-8 dictionary for mecab required.')\r
- mecabrc = os.path.join(jtalk_dir, 'mecabrc')\r
- args = (c_char_p * 5)('mecab', '-d', dic.encode('utf-8'), '-r', mecabrc.encode('utf-8'))\r
- mecab = libmc.mecab_new(5, args)\r
- if logwrite_:\r
- if not mecab: logwrite_('mecab_new failed.')\r
- logwrite_(libmc.mecab_strerror(mecab))\r
- #if logwrite_: logwrite_('mecab init end')\r
-\r
-def Mecab_analysis(src, features, logwrite_ = None):\r
- if not src:\r
- if logwrite_: logwrite('src empty')\r
- features.size = 0\r
- return\r
- head = libmc.mecab_sparse_tonode(mecab, src)\r
- if head is None:\r
- if logwrite_: logwrite('mecab_sparse_tonode result empty')\r
- features.size = 0\r
- return\r
- features.size = 0\r
-\r
- # make array of features\r
- node = head\r
- i = 0\r
- while node:\r
- s = node[0].stat\r
- if s != MECAB_BOS_NODE and s != MECAB_EOS_NODE:\r
- c = node[0].length\r
- s = string_at(node[0].surface, c) + "," + string_at(node[0].feature)\r
- if logwrite_: logwrite_(s.decode(CODE, 'ignore'))\r
- buf = create_string_buffer(s)\r
- dst_ptr = features.feature[i]\r
- src_ptr = byref(buf)\r
- memmove(dst_ptr, src_ptr, len(s)+1)\r
- i += 1\r
- node = node[0].next\r
- features.size = i\r
- if i > FECOUNT: \r
- if logwrite_: logwrite('too many nodes')\r
- return\r
- return\r
-\r
-# for debug\r
-def Mecab_print(mf, logwrite_ = None, CODE_ = CODE, output_header = True):\r
- if logwrite_ is None: return\r
- feature = mf.feature\r
- size = mf.size\r
- if feature is None or size is None: \r
- if output_header:\r
- logwrite_( "Mecab_print size: 0" )\r
- return\r
- s2 = ''\r
- if output_header:\r
- s2 += "Mecab_print size: %d\n" % size\r
- for i in xrange(0, size):\r
- s = string_at(feature[i])\r
- if s:\r
- if CODE_ is None:\r
- s2 += "%d %s\n" % (i, s)\r
- else:\r
- s2 += "%d %s\n" % (i, s.decode(CODE_, 'ignore'))\r
- else:\r
- s2 += "[None]\n"\r
- logwrite_(s2)\r
-\r
-def Mecab_getFeature(mf, pos, CODE_ = CODE):\r
- s = string_at(mf.feature[pos])\r
- return s.decode(CODE_, 'ignore')\r
-\r
-def Mecab_setFeature(mf, pos, s, CODE_ = CODE):\r
- s = s.encode(CODE_, 'ignore')\r
- buf = create_string_buffer(s)\r
- dst_ptr = mf.feature[pos]\r
- src_ptr = byref(buf)\r
- memmove(dst_ptr, src_ptr, len(s)+1)\r
-\r
-def getMoraCount(s):\r
- # 1/3 => 3\r
- # */* => 0\r
- m = s.split('/')\r
- if len(m) == 2:\r
- m2 = m[1]\r
- if m2 != '*':\r
- return int(m2)\r
- return 0\r
-\r
-# PATTERN 1\r
-# before:\r
-# 1 五絡脈病証,名詞,数,*,*,*,*,*\r
-#\r
-# after:\r
-# 1 五絡脈病証,名詞,普通名詞,*,*,*,*,五絡脈病証,ゴミャクラクビョウショウ,\r
-# ゴミャクラクビョーショー,1/9,C0\r
-# \r
-# PATTERN 2\r
-# before:\r
-# 0 ∫⣿♪ ,名詞,サ変接続,*,*,*,*,*\r
-#\r
-# after:\r
-# 0 ∫⣿♪ ,名詞,サ変接続,*,*,*,*,∫♪ ,セキブンキゴーイチニーサンヨンゴーロクナナ\r
-# ハチノテンオンプ,セキブンキゴーイチニーサンヨンゴーロクナナハチノテンオンプ,1/29,C0\r
-# \r
-# PATTERN 3\r
-# before:\r
-# 0 ま,接頭詞,名詞接続,*,*,*,*,ま,マ,マ,1/1,P2\r
-# 1 ー,名詞,一般,*,*,*,*,*\r
-#\r
-# after:\r
-# 0 ま,接頭詞,名詞接続,*,*,*,*,まー,マー,マー,1/2,P2\r
-# 1 ー,名詞,一般,*,*,*,*,*\r
-def Mecab_correctFeatures(mf, CODE_ = CODE):\r
- for pos in xrange(0, mf.size):\r
- ar = Mecab_getFeature(mf, pos, CODE_=CODE_).split(',')\r
- need_fix = False\r
- if ar[2] == u'数' and ar[7] == u'*': \r
- need_fix = True\r
- if ar[1] == u'名詞' and ar[2] == u'サ変接続' and ar[7] == u'*': \r
- need_fix = True\r
- if need_fix:\r
- hyoki = ar[0]\r
- yomi = ''\r
- pron = ''\r
- mora = 0\r
- nbmf = NonblockingMecabFeatures()\r
- for c in hyoki:\r
- Mecab_analysis(Mecab_text2mecab(c, CODE_=CODE_), nbmf)\r
- for pos2 in xrange(0, nbmf.size):\r
- ar2 = Mecab_getFeature(nbmf, pos2, CODE_=CODE_).split(',')\r
- if len(ar2) > 10:\r
- yomi += ar2[8]\r
- pron += ar2[9]\r
- mora += getMoraCount(ar2[10])\r
- nbmf = None\r
- feature = u'{h},名詞,普通名詞,*,*,*,*,{h},{y},{p},1/{m},C0'.format(h=hyoki, y=yomi, p=pron, m=mora)\r
- Mecab_setFeature(mf, pos, feature, CODE_=CODE_)\r
- elif pos > 0 and ar[0] == u'ー' and ar[1] == u'名詞' and ar[2] == u'一般':\r
- ar2 = Mecab_getFeature(mf, pos-1, CODE_=CODE_).split(',')\r
- if len(ar2) > 10:\r
- hyoki = ar2[0] + u'ー'\r
- hin1 = ar2[1]\r
- hin2 = ar2[2]\r
- yomi = ar2[8] + u'ー'\r
- pron = ar2[9] + u'ー'\r
- mora = getMoraCount(ar2[10]) + 1\r
- feature = u'{h},{h1},{h2},*,*,*,*,{h},{y},{p},1/{m},C0'.format(h=hyoki, h1=hin1, h2=hin2, y=yomi, p=pron, m=mora)\r
- Mecab_setFeature(mf, pos-1, feature, CODE_=CODE_)\r
- elif pos >= 2:\r
- ar3 = Mecab_getFeature(mf, pos-2, CODE_=CODE_).split(',')\r
- if len(ar3) > 10 and ar3[1] != u'記号':\r
- hyoki = ar3[0] + ar2[0] + u'ー'\r
- hin1 = ar3[1]\r
- hin2 = ar3[2]\r
- yomi = ar3[8] + ar2[0] + u'ー'\r
- pron = ar3[9] + ar2[0] + u'ー'\r
- mora = getMoraCount(ar3[10]) + len(ar2[0]) + 1\r
- feature = u'{h},{h1},{h2},*,*,*,*,{h},{y},{p},1/{m},C0'.format(h=hyoki, h1=hin1, h2=hin2, y=yomi, p=pron, m=mora)\r
- Mecab_setFeature(mf, pos-2, feature, CODE_=CODE_)\r
-\r
-def Mecab_utf8_to_cp932(mf):\r
- for pos in xrange(0, mf.size):\r
- s = Mecab_getFeature(mf, pos, CODE_ = 'utf-8')\r
- Mecab_setFeature(mf, pos, s, CODE_ = 'cp932')\r
+++ /dev/null
-# coding: UTF-8\r
-#translator1.py (Japanese Braille translator Phase 1)\r
-#A part of NonVisual Desktop Access (NVDA)\r
-#Copyright (C) 2012 Masataka.Shinke, Takuya Nishimoto\r
-#Copyright (C) 2013 Takuya Nishimoto (NVDA Japanese Team)\r
-#This file is covered by the GNU General Public License.\r
-#See the file COPYING for more details.\r
-\r
-from __future__ import unicode_literals\r
-import unicodedata\r
-import re\r
-\r
-kana1_dic = {\r
- 'ア':'⠁',\r
- 'イ':'⠃',\r
- 'ウ':'⠉',\r
- 'エ':'⠋',\r
- 'オ':'⠊',\r
- 'カ':'⠡',\r
- 'キ':'⠣',\r
- 'ク':'⠩',\r
- 'ケ':'⠫',\r
- 'コ':'⠪',\r
- 'サ':'⠱',\r
- 'シ':'⠳',\r
- 'ス':'⠹',\r
- 'セ':'⠻',\r
- 'ソ':'⠺',\r
- 'タ':'⠕',\r
- 'チ':'⠗',\r
- 'ツ':'⠝',\r
- 'テ':'⠟',\r
- 'ト':'⠞',\r
- 'ナ':'⠅',\r
- 'ニ':'⠇',\r
- 'ヌ':'⠍',\r
- 'ネ':'⠏',\r
- 'ノ':'⠎',\r
- 'ハ':'⠥',\r
- 'ヒ':'⠧',\r
- 'フ':'⠭',\r
- 'ヘ':'⠯',\r
- 'ホ':'⠮',\r
- 'マ':'⠵',\r
- 'ミ':'⠷',\r
- 'ム':'⠽',\r
- 'メ':'⠿',\r
- 'モ':'⠾',\r
- 'ヤ':'⠌',\r
- 'ユ':'⠬',\r
- 'ヨ':'⠜',\r
- 'ラ':'⠑',\r
- 'リ':'⠓',\r
- 'ル':'⠙',\r
- 'レ':'⠛',\r
- 'ロ':'⠚',\r
- 'ワ':'⠄',\r
- 'ヰ':'⠆',\r
- 'ヱ':'⠖',\r
- 'ヲ':'⠔',\r
- 'ン':'⠴',\r
- 'ッ':'⠂',\r
- 'ヴ':'⠐⠉',\r
- 'ガ':'⠐⠡',\r
- 'ギ':'⠐⠣',\r
- 'グ':'⠐⠩',\r
- 'ゲ':'⠐⠫',\r
- 'ゴ':'⠐⠪',\r
- 'ザ':'⠐⠱',\r
- 'ジ':'⠐⠳',\r
- 'ズ':'⠐⠹',\r
- 'ゼ':'⠐⠻',\r
- 'ゾ':'⠐⠺',\r
- 'ダ':'⠐⠕',\r
- 'ヂ':'⠐⠗',\r
- 'ヅ':'⠐⠝',\r
- 'デ':'⠐⠟',\r
- 'ド':'⠐⠞',\r
- 'バ':'⠐⠥',\r
- 'ビ':'⠐⠧',\r
- 'ブ':'⠐⠭',\r
- 'ベ':'⠐⠯',\r
- 'ボ':'⠐⠮',\r
- 'パ':'⠠⠥',\r
- 'ピ':'⠠⠧',\r
- 'プ':'⠠⠭',\r
- 'ペ':'⠠⠯',\r
- 'ポ':'⠠⠮',\r
- }\r
-kana2_dic = {\r
- 'イェ':'⠈⠋',\r
- 'キャ':'⠈⠡',\r
- 'キュ':'⠈⠩',\r
- 'キェ':'⠈⠫',\r
- 'キョ':'⠈⠪',\r
- 'シャ':'⠈⠱',\r
- 'スィ':'⠈⠳',\r
- 'シュ':'⠈⠹',\r
- 'シェ':'⠈⠻',\r
- 'ショ':'⠈⠺',\r
- 'チャ':'⠈⠕',\r
- 'ティ':'⠈⠗',\r
- 'チュ':'⠈⠝',\r
- 'チェ':'⠈⠟',\r
- 'チョ':'⠈⠞',\r
- 'ニャ':'⠈⠅',\r
- 'ニュ':'⠈⠍',\r
- 'ニェ':'⠈⠏',\r
- 'ニョ':'⠈⠎',\r
- 'ヒャ':'⠈⠥',\r
- 'ヒュ':'⠈⠭',\r
- 'ヒェ':'⠈⠯',\r
- 'ヒョ':'⠈⠮',\r
- 'ミャ':'⠈⠵',\r
- 'ミュ':'⠈⠽',\r
- 'ミェ':'⠈⠿',\r
- 'ミョ':'⠈⠾',\r
- 'リャ':'⠈⠑',\r
- 'リュ':'⠈⠙',\r
- 'リェ':'⠈⠛',\r
- 'リョ':'⠈⠚',\r
- 'ギャ':'⠘⠡',\r
- 'ギュ':'⠘⠩',\r
- 'ギェ':'⠘⠫',\r
- 'ギョ':'⠘⠪',\r
- 'ジャ':'⠘⠱',\r
- 'ズィ':'⠘⠳',\r
- 'ジュ':'⠘⠹',\r
- 'ジェ':'⠘⠻',\r
- 'ジョ':'⠘⠺',\r
- 'ヂャ':'⠘⠕',\r
- 'ディ':'⠘⠗',\r
- 'ヂュ':'⠘⠝',\r
- 'ヂェ':'⠘⠟',\r
- 'ヂョ':'⠘⠞',\r
- 'ビャ':'⠘⠥',\r
- 'ビュ':'⠘⠭',\r
- 'ビェ':'⠘⠯',\r
- 'ビョ':'⠘⠮',\r
- 'テュ':'⠨⠝',\r
- 'ピャ':'⠨⠥',\r
- 'ピュ':'⠨⠭',\r
- 'ピョ':'⠨⠮',\r
- 'フュ':'⠨⠬',\r
- 'フョ':'⠨⠜',\r
- 'デュ':'⠸⠝',\r
- 'ヴュ':'⠸⠬',\r
- 'ヴョ':'⠸⠜',\r
- 'ウァ':'⠢⠁',\r
- 'ウィ':'⠢⠃',\r
- 'ウェ':'⠢⠋',\r
- 'ウォ':'⠢⠊',\r
- 'クァ':'⠢⠡',\r
- 'クィ':'⠢⠣',\r
- 'クェ':'⠢⠫',\r
- 'クォ':'⠢⠪',\r
- 'ツァ':'⠢⠕',\r
- 'ツィ':'⠢⠗',\r
- 'トゥ':'⠢⠝',\r
- 'ツェ':'⠢⠟',\r
- 'ツォ':'⠢⠞',\r
- 'ファ':'⠢⠥',\r
- 'フィ':'⠢⠧',\r
- 'フェ':'⠢⠯',\r
- 'フォ':'⠢⠮',\r
- 'グァ':'⠲⠡',\r
- 'グィ':'⠲⠣',\r
- 'グェ':'⠲⠫',\r
- 'グォ':'⠲⠪',\r
- 'ヅァ':'⠲⠕',\r
- 'ヅィ':'⠲⠗',\r
- 'ドゥ':'⠲⠝',\r
- 'ヅェ':'⠲⠟',\r
- 'ヅォ':'⠲⠞',\r
- 'ヴァ':'⠲⠥',\r
- 'ヴィ':'⠲⠧',\r
- 'ヴェ':'⠲⠯',\r
- 'ヴォ':'⠲⠮',\r
- }\r
-jp_symbol_dic = {\r
- '+':'⠢',\r
- '-':'⠤',\r
- ':':'⠐⠂',\r
- '\\':'⠫', # yen mark\r
- '?':'⠢ ', # one space\r
- '@':'⠪',\r
- '<':'⠔⠔',\r
- '>':'⠢⠢',\r
- '=':'⠒⠒',\r
- '#':'⠰⠩',\r
- '$':'⠹',\r
- '%':'⠰⠏',\r
- '&':'⠰⠯',\r
- '*':'⠰⠡',\r
- ';':'⠆',\r
- '|':'⠳',\r
- '"':'⠶',\r
- # "'":'⠄',\r
- #'/':'⠌',\r
- '.':'⠲',\r
- '!':'⠖ ', # one space\r
- '^':'⠘',\r
- '`':'⠐⠑',\r
- '_':'⠐⠤',\r
- '~':'⠐⠉',\r
- 'ー':'⠒',\r
- '、':'⠰ ', # one space\r
- '。':'⠲ ', # two spaces\r
- '・':'⠐ ', # one space\r
- '|':'⠶',\r
- '_':'⠤',\r
- '「':'⠤',\r
- '」':'⠤',\r
- '『':'⠰⠤',\r
- '』':'⠤⠆',\r
- '「':'⠤',\r
- '」':'⠤',\r
- '(':'⠶',\r
- ')':'⠶',\r
- '(':'⠶',\r
- ')':'⠶',\r
- '[':'⠐⠶',\r
- ']':'⠶⠂',\r
- '“':'⠐⠶',\r
- '”':'⠶⠂',\r
- '{':'⠐⠶',\r
- '}':'⠶⠂',\r
- '‘':'⠐⠶',\r
- '’':'⠶⠂',\r
- '〔':'⠐⠶',\r
- '〕':'⠶⠂',\r
- '〈':'⠐⠶',\r
- '〉':'⠶⠂',\r
- '《':'⠐⠶',\r
- '》':'⠶⠂',\r
- '【':'⠐⠶',\r
- '】':'⠶⠂',\r
- '〝':'⠐⠶',\r
- '〟':'⠶⠂',\r
- '☆':'⠰⠮⠂',\r
- '★':'⠰⠮⠆',\r
- '○':'⠠⠵⠂',\r
- '●':'⠠⠵⠆',\r
- '◎':'⠠⠵⠲',\r
- '□':'⠠⠳⠂',\r
- '■':'⠠⠳⠆',\r
- '△':'⠠⠱⠂',\r
- '▲':'⠠⠱⠆',\r
- '▽':'⠰⠱⠂',\r
- '×':'⠰⠡⠂',\r
- '▼':'⠰⠱⠆',\r
- '◇':'⠨⠧⠂',\r
- '◆':'⠨⠧⠆',\r
- '※':'⠔⠔ ', # 第1星印 35-35 (後ろを1マスあける)\r
- '→':' ⠒⠒⠕ ', # 矢印 前後に1マスあける\r
- '←':' ⠪⠒⠒ ', # 矢印 前後に1マスあける\r
- }\r
-info_symbol_dic = {\r
- ',':'⠂',\r
- '?':'⠐⠦',\r
- '+':'⠬',\r
- "'":'⠄',\r
- '.':'⠲',\r
- '!':'⠖',\r
- '(':'⠦',\r
- ')':'⠴',\r
- '{':'⠣',\r
- '}':'⠜',\r
- '[':'⠷',\r
- ']':'⠾',\r
- '%': '⠻',\r
- '&':'⠯',\r
- '#':'⠩',\r
- '*':'⠡',\r
- }\r
-num_dic = {\r
- '0':'⠚',\r
- '1':'⠁',\r
- '2':'⠃',\r
- '3':'⠉',\r
- '4':'⠙',\r
- '5':'⠑',\r
- '6':'⠋',\r
- '7':'⠛',\r
- '8':'⠓',\r
- '9':'⠊',\r
- }\r
-num_symbol_dic = {\r
- '.':'⠂',\r
- ',':'⠄',\r
- }\r
-alpha_dic = {\r
- 'a':'⠁',\r
- 'b':'⠃',\r
- 'c':'⠉',\r
- 'd':'⠙',\r
- 'e':'⠑',\r
- 'f':'⠋',\r
- 'g':'⠛',\r
- 'h':'⠓',\r
- 'i':'⠊',\r
- 'j':'⠚',\r
- 'k':'⠅',\r
- 'l':'⠇',\r
- 'm':'⠍',\r
- 'n':'⠝',\r
- 'o':'⠕',\r
- 'p':'⠏',\r
- 'q':'⠟',\r
- 'r':'⠗',\r
- 's':'⠎',\r
- 't':'⠞',\r
- 'u':'⠥',\r
- 'v':'⠧',\r
- 'w':'⠺',\r
- 'x':'⠭',\r
- 'y':'⠽',\r
- 'z':'⠵',\r
- }\r
-alpha_cap_dic = {\r
- 'A':'⠁',\r
- 'B':'⠃',\r
- 'C':'⠉',\r
- 'D':'⠙',\r
- 'E':'⠑',\r
- 'F':'⠋',\r
- 'G':'⠛',\r
- 'H':'⠓',\r
- 'I':'⠊',\r
- 'J':'⠚',\r
- 'K':'⠅',\r
- 'L':'⠇',\r
- 'M':'⠍',\r
- 'N':'⠝',\r
- 'O':'⠕',\r
- 'P':'⠏',\r
- 'Q':'⠟',\r
- 'R':'⠗',\r
- 'S':'⠎',\r
- 'T':'⠞',\r
- 'U':'⠥',\r
- 'V':'⠧',\r
- 'W':'⠺',\r
- 'X':'⠭',\r
- 'Y':'⠽',\r
- 'Z':'⠵',\r
- }\r
-\r
-def is_ara(c):\r
- # 数字の後につなぎ符が必要\r
- return c in 'アイウエオラリルレロ'\r
-\r
-def translateWithInPos(text):\r
- retval = ''\r
- pos = 0\r
- latin = False # 外字符モード\r
- num = False # 数符モード\r
- capital = False # 二重大文字符モード\r
- quote_mode = False # 外国語引用符モード\r
- info_mode = False # 情報処理点字モード\r
- text = unicodedata.normalize('NFKC', text)\r
- inPos = []\r
-\r
- while pos < len(text):\r
- #space\r
- if text[pos] == ' ':\r
- retval += ' '\r
- inPos.append(pos)\r
- capital = latin = num = False\r
- pos += 1\r
- #Numeric\r
- elif text[pos] in num_dic:\r
- latin = False\r
- if not num:\r
- retval += '⠼'\r
- inPos.append(pos)\r
- num = True\r
- while text[pos] in num_dic:\r
- retval += num_dic[text[pos]]\r
- inPos.extend([pos] * len(num_dic[text[pos]]))\r
- pos += 1\r
- if pos >= len(text):\r
- break\r
- # info symbol\r
- elif info_mode and text[pos] in info_symbol_dic:\r
- retval += info_symbol_dic[text[pos]]\r
- inPos.extend([pos] * len(info_symbol_dic[text[pos]]))\r
- num = capital = False\r
- pos += 1\r
- #Numeric symbols\r
- elif num and text[pos] in num_symbol_dic and \\r
- pos+1 < len(text) and text[pos+1].isdigit():\r
- retval += num_symbol_dic[text[pos]]\r
- inPos.extend([pos] * len(num_symbol_dic[text[pos]]))\r
- pos += 1\r
- # halfshape apostrophe symbol\r
- elif text[pos] == "'":\r
- if pos+1 < len(text) and text[pos+1].isdigit():\r
- retval += '⠼⠄'\r
- inPos.extend([pos, pos])\r
- num = True\r
- pos += 1\r
- # slash symbol\r
- elif text[pos] == '/':\r
- retval += '⠌'\r
- inPos.append(pos)\r
- num = capital = False\r
- pos += 1\r
- #Japanese symbols\r
- elif text[pos] in jp_symbol_dic:\r
- retval += jp_symbol_dic[text[pos]]\r
- inPos.extend([pos] * len(jp_symbol_dic[text[pos]]))\r
- latin = num = False\r
- pos += 1\r
- # lower/upper case alphabet\r
- elif text[pos] in alpha_dic or text[pos] in alpha_cap_dic:\r
- if not latin and not quote_mode:\r
- retval += '⠰'\r
- inPos.append(pos)\r
- elif info_mode and pos >= 2 and \\r
- text[pos-2].isdigit() and \\r
- text[pos-1] == '.' and \\r
- text[pos] in 'abcdefghij':\r
- # 情報処理で数字、ピリオドのあとにa~jが続くときは小文字フラグ\r
- retval += '⠰'\r
- inPos.append(pos)\r
- latin = True\r
- num = False\r
- # 大文字または小文字が続く範囲の終点を tpos に格納\r
- tpos = pos\r
- upper_count = lower_count = 0\r
- while tpos < len(text):\r
- if text[tpos] in alpha_cap_dic:\r
- upper_count += 1\r
- tpos += 1\r
- elif text[tpos] in alpha_dic:\r
- lower_count += 1\r
- tpos += 1\r
- else:\r
- break\r
- # 大文字だけが2文字以上連続する場合は二重大文字符\r
- if upper_count > 1 and lower_count == 0:\r
- retval += '⠠⠠'\r
- inPos.extend([pos, pos])\r
- capital = True\r
- else:\r
- capital = False\r
- # アルファベットの続く部分を変換\r
- while pos < tpos:\r
- if not capital and text[pos] in alpha_cap_dic:\r
- retval += '⠠'\r
- inPos.append(pos)\r
- retval += alpha_dic[text[pos].lower()]\r
- inPos.append(pos)\r
- pos += 1\r
- #Two kana characters\r
- elif pos+1 < len(text) and text[pos:pos+2] in kana2_dic:\r
- if latin:\r
- retval += '⠤'\r
- inPos.append(pos - 1) # つなぎ符は直前の文字に対応\r
- elif num and is_ara(text[pos:pos+1]):\r
- retval += '⠤'\r
- inPos.append(pos - 1) # つなぎ符は直前の文字に対応\r
- retval += kana2_dic[text[pos:pos+2]]\r
- inPos.extend([pos] * len(kana2_dic[text[pos:pos+2]]))\r
- latin = num = False\r
- pos += 2\r
- #One kana character\r
- elif text[pos] in kana1_dic:\r
- if latin:\r
- retval += '⠤'\r
- inPos.append(pos - 1) # つなぎ符は直前の文字に対応\r
- elif num:\r
- if is_ara(text[pos]):\r
- retval += '⠤'\r
- inPos.append(pos - 1) # つなぎ符は直前の文字に対応\r
- elif text[pos] == 'ワ' and pos+3 < len(text) and \\r
- is_ara(text[pos+1]) and is_ara(text[pos+2]) and is_ara(text[pos+3]):\r
- retval += '⠤'\r
- inPos.append(pos - 1) # つなぎ符は直前の文字に対応\r
- retval += kana1_dic[text[pos]]\r
- inPos.extend([pos] * len(kana1_dic[text[pos]]))\r
- latin = num = False\r
- pos += 1\r
- #Braille should not be changed\r
- elif 0x2800 <= ord(text[pos]) and ord(text[pos]) <= 0x28ff:\r
- latin = False\r
- #数字モード\r
- if text[pos] == '⠼':\r
- num = True\r
- else:\r
- num = False\r
- #外国語引用符モード切替\r
- if not quote_mode and text[pos] == '⠦':\r
- quote_mode = True\r
- if quote_mode and text[pos] == '⠴':\r
- quote_mode = False\r
- #情報処理モード切替\r
- if text[pos] == '⠠' and pos+1 < len(text):\r
- if text[pos+1] == '⠦':\r
- info_mode = True\r
- elif text[pos+1] == '⠴':\r
- info_mode = False\r
-\r
- if ord(text[pos]) == 0x2800:\r
- retval += ' ' # use 0x20\r
- inPos.append(pos)\r
- else:\r
- retval += text[pos]\r
- inPos.append(pos)\r
- pos += 1\r
- #Exception\r
- else:\r
- latin = num = False\r
- retval += '□'\r
- inPos.append(pos)\r
- pos += 1\r
- # rstrip with inPos\r
- outbuf = retval\r
- if text[-1] != ' ':\r
- while outbuf[-1:] == ' ':\r
- outbuf = outbuf[:-1]\r
- inPos.pop()\r
- return (outbuf, inPos)\r
+++ /dev/null
-# coding: UTF-8\r
-#translator2.py (Japanese Braille translator Phase 2)\r
-#A part of NonVisual Desktop Access (NVDA)\r
-#Copyright (C) 2012-2013 Takuya Nishimoto (NVDA Japanese Team)\r
-#This file is covered by the GNU General Public License.\r
-#See the file COPYING for more details.\r
-\r
-from __future__ import unicode_literals\r
-import os\r
-import copy\r
-from _nvdajp_unicode import unicode_normalize\r
-from mecab import *\r
-import translator1\r
-\r
-_logwrite = None\r
-\r
-try:\r
- from logHandler import log\r
- _logwrite = log.debug\r
-except:\r
- def __print(s): print s\r
- _logwrite = __print\r
-\r
-CONNECTED_MORPHS = {\r
- 'について': [\r
- ['に', 'ニ', '0/1', None, None, '*'],\r
- ['ついて', 'ツイテ', '1/3', '動詞', '*', '*'],\r
- ],\r
- 'により': [\r
- ['に', 'ニ', '0/1', None, None, '*'],\r
- ['より', 'ヨリ', '0/2', '動詞', '*', '*'],\r
- ],\r
- 'による': [\r
- ['に', 'ニ', '0/1', None, None, '*'],\r
- ['よる', 'ヨル', '0/2', '動詞', '*', '*'],\r
- ],\r
- 'において': [\r
- ['に', 'ニ', '0/1', None, None, '*'],\r
- ['おいて', 'オイテ', '0/3', '動詞', '*', '*'],\r
- ],\r
- 'における': [\r
- ['に', 'ニ', '0/1', None, None, '*'],\r
- ['おける', 'オケル', '0/3', '動詞', '*', '*'],\r
- ],\r
- 'によって': [\r
- ['に', 'ニ', '0/1', None, None, '*'],\r
- ['よって', 'ヨッテ', '0/3', '動詞', '*', '*'],\r
- ],\r
- 'にとって': [\r
- ['に', 'ニ', '0/1', None, None, '*'],\r
- ['とって', 'トッテ', '0/3', '動詞', '*', '*'],\r
- ],\r
- 'に対して': [\r
- ['に', 'ニ', '0/1', None, None, '*'],\r
- ['対して', 'タイシテ', '1/4', '動詞', '*', '*'],\r
- ],\r
- 'に関して': [\r
- ['に', 'ニ', '0/1', None, None, '*'],\r
- ['関して', 'カンシテ', '1/4', '動詞', '*', '*'],\r
- ],\r
- 'につき': [\r
- ['に', 'ニ', '0/1', None, None, '*'],\r
- ['つき', 'ツキ', '1/2', '動詞', '*', '*'],\r
- ],\r
- 'という': [\r
- ['と', 'ト', '0/1', None, None, '*'],\r
- ['いう', 'イウ', '0/2', '動詞', '*', '*'],\r
- ],\r
- 'どうして': [\r
- ['どう', 'ドー', '0/2', None, None, '*'],\r
- ['して', 'シテ', '0/2', '動詞', '*', '*'],\r
- ],\r
- 'として': [\r
- ['と', 'ト', '1/1', None, None, '*'],\r
- ['して', 'シテ', '0/2', '動詞', '*', '*'],\r
- ],\r
- 'なくなる': [\r
- ['なく', 'ナク', '2/2', None, None, None],\r
- ['なる', 'ナル', '1/2', '動詞', '自立', None],\r
- ],\r
-}\r
-\r
-class MecabMorph(object):\r
- __slots__ = ('hyouki', 'nhyouki', 'hinshi1', 'hinshi2', 'hinshi3', 'hinshi4', \r
- 'type1', 'type2', 'kihon',\r
- 'kana', 'yomi', 'accent', 'output', 'sepflag')\r
-\r
- def __init__(self):\r
- self.hyouki = '' # 表記\r
- self.nhyouki = '' # Unicode 正規化された表記\r
- self.hinshi1 = ''\r
- self.hinshi2 = ''\r
- self.hinshi3 = ''\r
- self.hinshi4 = ''\r
- self.type1 = ''\r
- self.type2 = ''\r
- self.kihon = ''\r
- self.kana = ''\r
- self.yomi = ''\r
- self.accent = ''\r
- self.output = ''\r
- self.sepflag = False # この後でマスアケをするか?\r
- \r
- # 付属語\r
- def is_substantive_word(self):\r
- if self.hinshi1 == '記号': return False\r
- if self.hinshi2 == '接頭': return True\r
- if self.hinshi2 == '接尾': return True\r
- if self.hinshi1 == '助動詞' and self.hyouki == 'ない': return False\r
- if self.hinshi1 == '名詞' and self.hyouki == 'の': return True\r
- if self.hinshi1 == '形容詞' and self.hyouki == 'なく': return True\r
- if self.hinshi1 in ('助動詞', '助詞'): return True\r
- return False\r
-\r
- # 自立語\r
- def is_independent_word(self):\r
- if self.hinshi1 == '記号': return False\r
- return not self.is_substantive_word()\r
-\r
- def write(self, logwrite):\r
- logwrite("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%d" % \r
- (self.hyouki, self.nhyouki, \r
- self.hinshi1, self.hinshi2, self.hinshi3, self.hinshi4,\r
- self.type1, self.type2, self.kihon,\r
- self.kana, self.yomi, self.accent, self.output, self.sepflag))\r
-\r
-def update_phonetic_symbols(mo):\r
- for p in range(0, len(mo.yomi)):\r
- # 点訳のてびき第3版 第2章 その1 1 5\r
- # 5、長音の書き表し方 (1), (2)\r
- # before: ああ,ああ,感動詞,*,*,*,アア,アー,1/2,アー,0\r
- # after: ああ,ああ,感動詞,*,*,*,アア,アー,1/2,アア,0\r
- if mo.yomi[p] == 'ー' and mo.kana[p] in 'アイエ':\r
- mo.output = mo.output[:p] + mo.kana[p] + mo.output[p+1:]\r
-\r
- # 点訳のてびき第3版 第2章 その1 1 6\r
- # 6、「ジ ズ ジャ ジュ ジョ」と「ヂ ヅ ヂャ ヂョ」の使い分け\r
- # before: 綴る,綴る,動詞,自立,*,*,ツヅル,ツズル,0/3,ツズル,0\r
- # after: 綴る,綴る,動詞,自立,*,*,ツヅル,ツズル,0/3,ツヅル,0\r
- if (mo.yomi[p] == 'ジ' and mo.kana[p] == 'ヂ') or (\r
- mo.yomi[p] == 'ズ' and mo.kana[p] == 'ヅ'):\r
- mo.output = mo.output[:p] + mo.kana[p] + mo.output[p+1:]\r
-\r
-def mecab_to_morphs(mf):\r
- li = []\r
- if mf is None or mf.feature is None or mf.size is None: \r
- return li\r
- for i in xrange(0, mf.size):\r
- s = string_at(mf.feature[i])\r
- if s:\r
- s = s.decode(CODE, 'ignore')\r
- ar = s.split(",")\r
- mo = MecabMorph()\r
- mo.hyouki = ar[0]\r
- mo.nhyouki = unicode_normalize(ar[0])\r
- mo.hinshi1 = ar[1]\r
- mo.hinshi2 = ar[2]\r
- if len(ar) > 3:\r
- mo.hinshi3 = ar[3]\r
- mo.hinshi4 = ar[4]\r
- if len(ar) > 5:\r
- mo.type1 = ar[5]\r
- if len(ar) > 6:\r
- mo.type2 = ar[6]\r
- if len(ar) > 7:\r
- mo.kihon = ar[7]\r
- if len(ar) > 9:\r
- mo.kana = ar[8]\r
- mo.yomi = ar[9]\r
- mo.accent = ar[10]\r
- if len(ar) > 12:\r
- # Mecab辞書の拡張フィールドの点訳表記があれば使用する\r
- mo.output = ar[12]\r
- else:\r
- mo.output = ar[9]\r
- update_phonetic_symbols(mo)\r
- mo.sepflag = False\r
- li.append(mo)\r
- return li\r
-\r
-def replace_morphs(li, dic):\r
- new_li = []\r
- for mo in li:\r
- if mo.hyouki in dic.keys():\r
- new_morphs = dic[mo.hyouki]\r
- for i in new_morphs:\r
- m = copy.deepcopy(mo)\r
- m.hyouki = m.nhyouki = i[0] # に\r
- if i[3]: m.hinshi1 = i[3]\r
- if i[4]: m.hinshi2 = i[4]\r
- if i[5]: m.hinshi3 = i[5]\r
- m.output = m.kana = m.yomi = i[1] # ニ\r
- m.accent = i[2] # 0/1\r
- new_li.append(m)\r
- else:\r
- new_li.append(mo)\r
- return new_li\r
-\r
-RE_KANSUJI = re.compile('^[一二三四五六七八九〇零十拾百千壱二参]+$')\r
-\r
-# http://programminblog.blogspot.jp/2010/11/python.html\r
-def kansuji2arabic(text):\r
- if not RE_KANSUJI.match(text):\r
- return None\r
- result = 0\r
- digit = 1\r
- numgroup = 1\r
- kanindex = len(text)\r
- while kanindex > 0:\r
- c = text[(kanindex - 1):kanindex]\r
- c1 = text[kanindex:(kanindex + 1)]\r
- kanindex -= 1\r
- if c in '〇零':\r
- digit *= 10\r
- elif c in '十拾':\r
- digit = 10\r
- elif c == '百':\r
- if digit == 10 and c1 and c1 in '十拾':\r
- result += digit * numgroup\r
- digit = 100\r
- elif c == '千':\r
- if (digit == 10 and c1 and c1 in '十拾') or \\r
- (digit == 100 and c1 and c1 in '百'):\r
- result += digit * numgroup\r
- digit = 1000\r
- else:\r
- if c in '壱一':\r
- result += digit * numgroup\r
- elif c in '二弐':\r
- result += 2 * digit * numgroup\r
- elif c in '三参':\r
- result += 3 * digit * numgroup\r
- elif c in '四':\r
- result += 4 * digit * numgroup\r
- elif c in '五':\r
- result += 5 * digit * numgroup\r
- elif c in '六':\r
- result += 6 * digit * numgroup\r
- elif c in '七':\r
- result += 7 * digit * numgroup\r
- elif c in '八':\r
- result += 8 * digit * numgroup\r
- elif c in '九':\r
- result += 9 * digit * numgroup\r
- digit *= 10\r
- if (digit == 10 and text[:1] in '十拾') or \\r
- (digit == 100 and text[:1] in '百') or \\r
- (digit == 1000 and text[:1] in '千'):\r
- result += digit * numgroup\r
- text = '%d' % result\r
- return text\r
-\r
-def rewrite_number(li):\r
- new_li = []\r
- for mo in li:\r
- m = copy.deepcopy(mo)\r
- if m.hinshi2 != '固有名詞':\r
- ret = kansuji2arabic(m.hyouki)\r
- if ret:\r
- m.output = ret\r
- new_li.append(m)\r
- return new_li\r
-\r
-def concatinate_morphs(li):\r
- mo = copy.deepcopy(li[0])\r
- s = ''\r
- y = ''\r
- for i in li:\r
- s += i.hyouki\r
- y += i.yomi\r
- mo.hyouki = mo.nhyouki = s\r
- mo.yomi = mo.kana = mo.output = y\r
- return mo\r
-\r
-def replace_digit_morphs(li):\r
- # handle digit number kanji characters\r
- # input:\r
- # 十,名詞,数\r
- # 七,名詞,数\r
- # output:\r
- # 十七,名詞,数\r
- # input:\r
- # 二,名詞,数\r
- # 十,名詞,数\r
- # 五,名詞,数\r
- # output:\r
- # 二十五,名詞,数\r
- # input:\r
- # 三,名詞,数,*,*,*,*,三,サン,サン,0/2,C3\r
- # 兆,名詞,数,*,*,*,*,兆,チョウ,チョー,1/2,C3\r
- # 二,名詞,数,*,*,*,*,二,ニ,ニ,1/1,C3\r
- # 千,名詞,数,*,*,*,*,千,セン,セン,1/2,C3\r
- # 四,名詞,数,*,*,*,*,四,ヨン,ヨン,1/2,C1\r
- # 百,名詞,数,*,*,*,*,百,ヒャク,ヒャク,2/2,C3\r
- # 万,名詞,数,*,*,*,*,万,マン,マン,1/2,C3\r
- # output:\r
- # 三,三,名詞,数,*,*,サン,サン,,サン,0\r
- # 兆,兆,名詞,数,*,*,チョー,チョー,,チョー,0\r
- # 二千四百,二千四百,名詞,数,*,*,ニセンヨンヒャク,ニセンヨンヒャク,,ニセンヨンヒャク,0\r
- # 万,万,名詞,数,*,*,マン,マン,,マン,0\r
- # (correct: 3チョー 2400マン)\r
- new_li = []\r
- num_morphs = []\r
- for mo in li:\r
- if mo.hinshi2 == '数' and mo.hyouki == ',':\r
- # カンマ\r
- m = copy.deepcopy(mo)\r
- m.yomi = m.output = ','\r
- num_morphs.append(m)\r
- elif mo.hinshi2 == '数' and not mo.output.isdigit() and \\r
- not mo.hyouki in ('・', '万', '億', '兆', '京', '.'):\r
- # 漢数字の結合\r
- num_morphs.append(mo)\r
- elif mo.hinshi2 == '数' and mo.hyouki in '0123456789':\r
- # 算用数字の結合\r
- m = copy.deepcopy(mo)\r
- y = unicode_normalize(m.hyouki)\r
- m.output = m.hyouki = m.nhyouki = m.yomi = y\r
- num_morphs.append(m)\r
- else:\r
- if num_morphs:\r
- new_li.append(concatinate_morphs(num_morphs))\r
- num_morphs = []\r
- new_li.append(mo)\r
- if num_morphs:\r
- new_li.append(concatinate_morphs(num_morphs))\r
- return new_li\r
-\r
-RE_ALPHA = re.compile('^[A-Za-z]+$')\r
-\r
-def is_alpha(s):\r
- return RE_ALPHA.match(s)\r
-\r
-RE_ASCII_SYMBOLS = re.compile('^[\,\.\:\;\!\?\@\#\\\$\%\&\*\|\+\-\/\=\<\>\"\'\^\`\_\~]+$')\r
-\r
-def replace_alphabet_morphs(li):\r
- # アルファベットまたは記号だけで表記されている語を結合する\r
- # 情報処理点字の部分文字列になる記号を前後にまとめる\r
- # input:\r
- # B,B,記号,アルファベット,*,*,ビー,ビー,1/2,B\r
- # asi,asi,名詞,一般,*,*,アシー,アシー,0/3,asi\r
- # c,c,記号,アルファベット,*,*,シー,シー,1/2,c\r
- # output:\r
- # Basic,Basic,名詞,アルファベット,*,*,ビーアシーシー,ビーアシーシー,1/2,Basic\r
- new_li = []\r
- alp_morphs = []\r
- for pos in range(len(li)):\r
- mo = li[pos]\r
- if pos < len(li) - 1:\r
- next_mo = li[pos + 1]\r
- else:\r
- next_mo = None\r
- if is_alpha(mo.nhyouki):\r
- alp_morphs.append(mo)\r
- elif mo.nhyouki in r',+@/#$%&*;<':\r
- alp_morphs.append(mo)\r
- elif mo.nhyouki == '\\':\r
- alp_morphs.append(mo)\r
- elif mo.nhyouki[0] in r',+@/#$%&*;' and \\r
- RE_ASCII_SYMBOLS.match(mo.nhyouki):\r
- alp_morphs.append(mo)\r
- elif alp_morphs and mo.nhyouki in ',.' and \\r
- ((next_mo and next_mo.nhyouki == ' ') or \\r
- (next_mo and next_mo.hinshi1 in ('助詞', '助動詞')) or \\r
- (not next_mo)):\r
- alp_morphs.append(mo)\r
- elif alp_morphs and mo.nhyouki == ' ' and \\r
- next_mo and is_alpha(next_mo.nhyouki):\r
- alp_morphs.append(mo)\r
- elif alp_morphs and mo.nhyouki.isdigit():\r
- alp_morphs.append(mo)\r
- elif alp_morphs and mo.nhyouki in ',.:;!?@#\\$%&*|+-/=<>"\'^`_~{}[]':\r
- alp_morphs.append(mo)\r
- else:\r
- if alp_morphs:\r
- m = concatinate_morphs(alp_morphs)\r
- m.hinshi1 = '名詞'\r
- m.hinshi2 = 'アルファベット'\r
- m.nhyouki = m.output = unicode_normalize(m.nhyouki)\r
- new_li.append(m)\r
- alp_morphs = []\r
- new_li.append(mo)\r
- if alp_morphs:\r
- m = concatinate_morphs(alp_morphs)\r
- m.hinshi1 = '名詞'\r
- m.hinshi2 = 'アルファベット'\r
- m.nhyouki = m.output = unicode_normalize(m.nhyouki)\r
- new_li.append(m)\r
- return new_li\r
-\r
-# 日付の和語読み処理\r
-# すでに output 属性に半角数字が格納されている前提\r
-\r
-# 後続する '日' と形態素を結合する\r
-WAGO_DIC = {\r
- '1': 'ツイタチ', \r
- '2': 'フツカ',\r
- '3': 'ミッカ',\r
- '4': 'ヨッカ',\r
- '5': 'イツカ',\r
- '6': 'ムイカ',\r
- '7': 'ナノカ',\r
- '8': 'ヨーカ',\r
- '9': 'ココノカ',\r
- '10': 'トオカ',\r
- '20': 'ハツカ',\r
-}\r
-\r
-def fix_japanese_date_morphs(li):\r
- new_li = []\r
- for i in xrange(0, len(li)):\r
- prev_mo = li[i-1] if i-1>=0 else None\r
- mo = li[i]\r
- if mo.hyouki == '日' and mo.hinshi3 == '助数詞' and prev_mo is not None:\r
- if prev_mo.hyouki in ('14', '24', '十四', '一四', '二四', '二十四'):\r
- li[i].output = 'カ'\r
- new_li.append(li[i])\r
- elif prev_mo.output in WAGO_DIC:\r
- m = copy.deepcopy(mo)\r
- m.output = WAGO_DIC[prev_mo.output]\r
- m.hyouki = m.nhyouki = m.kana = m.yomi = m.output\r
- m.hinshi2 = '日付'\r
- m.hinshi3 = '*'\r
- # FIXME: m.accent\r
- new_li.pop()\r
- new_li.append(m)\r
- else:\r
- new_li.append(li[i])\r
- else:\r
- new_li.append(li[i])\r
- return new_li\r
-\r
-def should_separate(prev2_mo, prev_mo, mo, next_mo):\r
- if mo.hyouki == 'ー': return False\r
- if prev_mo.hyouki == 'ー': return False\r
- if mo.hyouki in 'ぁぃぅぇぉっゃゅょゎァィゥェォッャュョヮヵヶ': return False\r
-\r
- # )( -> あける\r
- # )陽が -> あける\r
- # '02 -> あけない\r
- if prev_mo.hinshi2 == '括弧閉' and prev_mo.nhyouki != "’":\r
- if mo.hinshi2 == '括弧開': return True\r
- if mo.hinshi1 == '名詞': return True\r
-\r
- # 東京/都 千代田/区\r
- if prev_mo.hinshi2 == '接尾' and prev_mo.hinshi3 == '地域' and \\r
- mo.hinshi2 == '固有名詞' and mo.hinshi3 == '地域':\r
- return True\r
- # 東京/都 交通/局\r
- if prev_mo.hinshi2 == '接尾' and prev_mo.hinshi3 == '地域' and \\r
- mo.hinshi1 == '名詞' and mo.hinshi2 == '一般':\r
- return True\r
- # 永田町 1\r
- if prev_mo.hinshi2 == '固有名詞' and prev_mo.hinshi3 == '地域' and \\r
- mo.hinshi2 == '数':\r
- return True\r
-\r
- # 晴れ/所に より\r
- if prev_mo.hinshi1 == '名詞' and mo.hyouki == '所により':\r
- return True\r
-\r
- # 一時/雨\r
- if prev_mo.hyouki == '一時' and mo.hyouki == '雨':\r
- return True\r
-\r
- # 数字の前のマスアケ\r
- if prev_mo.nhyouki in ('零下', '西暦', 'ボーイング', 'ベスト', 'ルイ', '先', '振替', 'No.', '一人当り') \\r
- and mo.output.isdigit():\r
- return True\r
-\r
- # 1月/1日\r
- if prev_mo.nhyouki[0].isdigit() and prev_mo.nhyouki[-1] == '月' and mo.output.isdigit():\r
- return True\r
- # 0/4月 -> 04月\r
- if prev_mo.output.isdigit() and mo.nhyouki[0].isdigit():\r
- return False\r
-\r
- # アラビア数字のあとに単位がきたら続ける\r
- # 三十,三十,名詞,数,*,*,30,30,1/4,30,1\r
- # センチメートル,センチメートル,名詞,一般,*,*,センチメートル,センチメートル,4/7,センチメートル,0\r
- if prev_mo.output.isdigit():\r
- if mo.hinshi3 == '助数詞': return False\r
- if mo.hyouki == 'センチメートル': return False\r
- if mo.nhyouki == '#': return False\r
-\r
- # 数%\r
- if prev_mo.hyouki == '数' and prev_mo.yomi == 'スー' and mo.hyouki == '%':\r
- return False\r
-\r
- # 三,三,名詞,数,*,*,サン,サン,0/2,3,0\r
- # 兆,兆,名詞,数,*,*,チョウ,チョー,1/2,チョー,1\r
- # 二千四百,二千四百,名詞,数,*,*,ニセンヨンヒャク,ニセンヨンヒャク,1/1,2400,0\r
- # 万,万,名詞,数,*,*,マン,マン,1/2,マン,0\r
- if prev_mo.hyouki in ('億', '兆', '京') and mo.output.isdigit():\r
- return True\r
-\r
- # ち,ち,名詞,一般,*,*,チ,チ,0/1,チ,0\r
- # ゅうりっぷ,ゅうりっぷ,名詞,一般,*,*,,,,ュウリップ,0\r
- if mo.hyouki[0] in 'ぁぃぅぇぉっゃゅょゎァィゥェォッャュョヮヵヶ': return False\r
-\r
- # 外国語引用符、マスアケ、助詞、助動詞\r
- if prev_mo.output and prev_mo.output.endswith('⠴') and mo.hinshi1 in ('助詞', '助動詞'): return True\r
-\r
- if prev_mo.hinshi1 == '名詞' and prev_mo.hinshi2 == '接尾':\r
- # 助数詞のあとにアラビア数字が来たらマスアケ\r
- # case 1:\r
- # 零,零,名詞,数,*,*,0,0,1/2,0,0\r
- # 時,時,名詞,接尾,助数詞,*,ジ,ジ,1/1,ジ,1\r
- # 十五,十五,名詞,数,*,*,15,15,1/3,15,0\r
- # 分,分,名詞,接尾,助数詞,*,フン,フン,1/2,フン,0\r
- # case 2:\r
- # 一,一,名詞,数,*,*,イチ,イチ,2/2,1,0\r
- # 人,人,名詞,接尾,助数詞,*,ニン,ニン,1/2,ニン,0\r
- # 当り,当り,名詞,接尾,一般,*,アタリ,アタリ,1/3,アタリ,1\r
- # 1,1,名詞,数,*,*,イチ,イチ,2/2,1,0\r
- # 0,0,名詞,数,*,*,ゼロ,ゼロ,1/2,0,0\r
- # 個,個,名詞,接尾,助数詞,*,コ,コ,1/1,コ,0\r
- if mo.output.isdigit(): return True\r
- if mo.hinshi1 == '動詞' and mo.hinshi2 == '非自立': return False\r
-\r
- # アルファベットの後の助詞、助動詞\r
- # CD,CD,名詞,一般,*,*,シーディー,シーディー,3/4,シーディー,0\r
- # を,を,助詞,格助詞,一般,*,ヲ,ヲ,0/1,ヲ,0\r
- if is_alpha(prev_mo.nhyouki) and mo.hinshi1 in ('助詞', '助動詞'):\r
- return True\r
-\r
- # ピリオドの後の助詞\r
- if prev_mo.nhyouki.endswith('.') and mo.hinshi1 == '助詞':\r
- return True\r
-\r
- # ナンバーマークの後の助詞\r
- if prev_mo.nhyouki == '#' and mo.hinshi1 == '助詞':\r
- return True\r
-\r
- # カナ名詞の後のアルファベット名詞\r
- if prev_mo.hinshi1 == '名詞' and is_alpha(mo.nhyouki):\r
- return False\r
-\r
- # (あける)\r
- # 映画,映画,名詞,一般,*,*,エイガ,エイガ,0/3,エイガ,1\r
- # 「,「,記号,括弧開,*,*,「,「,*/*,「,0\r
- # (あけない)\r
- # 機関,名詞,一般,*,*,*,*,機関,キカン,キカン,1/3,C1\r
- # (,記号,括弧開,*,*,*,*,(,(,(,*/*,*\r
- if prev_mo.hinshi1 == '名詞' and mo.hinshi2 == '括弧開' and mo.nhyouki != '(': return True\r
-\r
- # 間違い,間違い,名詞,ナイ形容詞語幹,*,*,マチガイ,マチガイ,3/4,マチガイ,1\r
- # なし,なし,助動詞,*,*,*,ナシ,ナシ,0/2,ナシ,0\r
- #\r
- # 味気,味気,名詞,ナイ形容詞語幹,*,*,アジケ,アジケ,0/3,アジケ,0\r
- # ない,ない,助動詞,*,*,*,ナイ,ナイ,1/2,ナイ,0\r
- #\r
- # 良く,形容詞,自立,*,*,形容詞・アウオ段,連用テ接続,良い,ヨク,ヨク,1/2,C3\r
- # ない,助動詞,*,*,*,特殊・ナイ,基本形,ない,ナイ,ナイ,1/2,動詞%F3@0/形容詞%F2@1\r
- #\r
- # で,で,助動詞,*,*,*,デ,デ,1/1,デ,0\r
- # は,は,助詞,係助詞,*,*,ハ,ワ,0/1,ワ,1\r
- # なく,なく,助動詞,*,*,*,ナク,ナク,0/2,ナク,1\r
- #\r
- # 「問題ない」の「ない」は「点訳のてびき」では形容詞だがMecabでは助動詞\r
- if mo.hinshi1 == '形容詞' and mo.kihon in ('ない', '無い'):\r
- # 形容詞「ない」は区切る\r
- # ただし前の語と複合している場合は前に続ける\r
- if prev_mo.kihon in ('隈', '心置き', '満遍'):\r
- return False\r
- return True\r
- if mo.hinshi1 == '助動詞' and mo.kihon in ('ない', '無い'):\r
- if prev_mo.hinshi1 == '助詞' and prev_mo.kihon == 'は':\r
- return True\r
- if prev_mo.hinshi1 == '形容詞' and prev_mo.kihon == '良い':\r
- return True\r
- if prev_mo.hinshi1 == '名詞' and prev_mo.hinshi2 == 'ナイ形容詞語幹' and \\r
- prev_mo.kihon in ('問題', '間違い'):\r
- return True\r
- if prev_mo.hinshi2 == '副助詞': # じゃない\r
- return True\r
- if prev_mo.hinshi1 == '動詞' and prev_mo.hinshi2 == '非自立' and \\r
- prev_mo.kihon in ('てる'): # てない\r
- return True\r
- if prev_mo.hinshi1 == '助動詞' and \\r
- prev_mo.kihon in ('だ'): # でない\r
- return True\r
- return False\r
-\r
- # お,黙り,なさい\r
- # 「お」がついて名詞化した語に「なさい・なさる」が続く場合は区切ってよい\r
- if prev2_mo and prev2_mo.hinshi1 == '接頭詞' and prev2_mo.hyouki == 'お' and \\r
- prev_mo.hinshi1 == '動詞' and prev_mo.type2 == '連用形' and \\r
- mo.kihon == 'なさる':\r
- return True\r
-\r
- # 労,せ,ず\r
- if prev_mo.hinshi1 == '名詞' and mo.hyouki == 'せ' and mo.kihon == 'する':\r
- return False\r
-\r
- # 不幸,に,し,て\r
- # 今,に,し,て\r
- # 居,ながら,に,し,て\r
- # 労,せ,ず,し,て\r
- # 若く,し,て\r
- # 私,を,し,て\r
- # 「して」が文語的表現の助詞である場合は前に続けて書く\r
- if mo.hyouki == 'し' and mo.kihon == 'する':\r
- if prev_mo.hyouki == 'ず' and prev_mo.hinshi1 == '助動詞':\r
- return False\r
- if prev_mo.hinshi1 == '形容詞' and prev_mo.type2 == '連用テ接続':\r
- return False\r
- if prev_mo.hinshi2 == '接続助詞':\r
- return False\r
- if prev_mo.type1 == '文語・ベシ':\r
- return False\r
- if next_mo and next_mo.hyouki == 'て':\r
- if prev_mo.hyouki == 'に' and prev_mo.hinshi1 == '助詞':\r
- return False\r
- if prev2_mo and prev2_mo.hyouki == '私' and prev_mo.hyouki == 'を':\r
- return False\r
-\r
- # 「・・ですこと」の「こと」は接尾語なので前に続ける\r
- if prev_mo.hyouki == 'です' and mo.hyouki == 'こと':\r
- return False\r
-\r
- # 「この程」「この度」\r
- # 「そのくせ」\r
- # 後ろの語と結びついて1語になっている場合は続ける\r
- if prev_mo.hyouki == 'この' and mo.hyouki in ('程', '度'):\r
- return False\r
- if prev_mo.hyouki == 'その' and mo.hyouki in ('くせ', 'うち', 'まま'):\r
- return False\r
- if prev_mo.hyouki == 'わが' and mo.hyouki == 'まま':\r
- return False\r
-\r
- if prev_mo.hinshi1 == '名詞' and mo.hinshi1 == '名詞':\r
- if mo.hinshi2 == '数': return False\r
- # 人名\r
- if prev_mo.hinshi4 in ('姓', '名') and mo.hinshi2 == '接尾' and mo.hinshi3 == '人名': return True\r
- # 複合名詞内部の2拍以下は切らない\r
- if not prev_mo.hinshi2 in ('数', 'アルファベット') and not mo.hinshi2 in ('数', 'アルファベット'):\r
- if len(prev_mo.yomi) <= 2 and len(mo.yomi) >= 3: return False\r
- if len(prev_mo.yomi) >= 3 and len(mo.yomi) <= 2: return False\r
- if mo.hinshi2 != '接尾': return True\r
-\r
- if prev_mo.hinshi1 == '形容詞' and mo.hyouki == 'ん': return False # いいんですけど\r
-\r
- if prev_mo.hinshi1 == '動詞' and prev_mo.hyouki == '見': return False # 見/まごう\r
- if prev_mo.hinshi1 == '動詞' and prev_mo.hinshi2 == '自立':\r
- if mo.hyouki == 'および': return True\r
- if mo.hinshi1 == '動詞' and mo.hinshi2 == '非自立': return False\r
-\r
- # その,その,連体詞,*,*,*,ソノ,ソノ,0/2,ソノ,1\r
- # よう,よう,名詞,非自立,助動詞語幹,*,ヨウ,ヨー,1/2,ヨー,0\r
- if prev_mo.hinshi1 == '連体詞' and mo.hinshi3 == '助動詞語幹': return False\r
-\r
- if prev_mo.hinshi1 == '接頭詞' and prev_mo.hyouki == '超' and mo.hinshi1 == '名詞': return True\r
- \r
- # お兄さん, お姉さん\r
- if prev_mo.hinshi1 == '接頭詞' and prev_mo.hyouki == 'お': return False\r
- # 新/東京/名所\r
- if prev_mo.hinshi1 == '接頭詞' and prev_mo.hinshi2 == '名詞接続' and \\r
- mo.hinshi1 == '名詞' and mo.hinshi2 == '固有名詞':\r
- return True\r
- if prev_mo.hinshi1 == '接頭詞' and mo.hinshi1 == '名詞': return False\r
-\r
- if prev_mo.hinshi1 == '助動詞' and prev_mo.hyouki == 'で' and mo.hinshi1 == '助動詞': return True\r
-\r
- if prev_mo.hinshi1 == '助詞' and mo.hyouki == 'よう': return False # のように\r
- if prev_mo.hinshi1 == '助詞' and mo.hinshi1 == '接頭詞': return True\r
-\r
- if prev_mo.is_substantive_word() and mo.is_independent_word(): return True\r
- if prev_mo.is_independent_word() and mo.is_independent_word(): return True\r
- return False\r
-\r
-def morphs_to_string(li, inbuf, logwrite):\r
- outbuf = ''\r
- inpos2 = []\r
- p = 0\r
- for i in range(0, len(li)):\r
- if not li[i].output:\r
- continue\r
- out = li[i].output\r
- outlen = len(out)\r
- outbuf += out\r
- hyolen = len(li[i].hyouki)\r
- if hyolen == outlen:\r
- inpos2.extend(range(p, p + outlen))\r
- elif out[:2] == '⠠⠦' and out[-2:] == '⠠⠴':\r
- # 情報処理用点字の内側\r
- c = outlen - 4\r
- inpos2.extend([p] * 2)\r
- inpos2.extend(range(p, p + c))\r
- inpos2.extend([p + c - 1] * 2)\r
- elif out[:1] == '⠦' and out[-1:] == '⠴':\r
- # 外国語引用符の内側\r
- c = outlen - 2\r
- inpos2.extend([p])\r
- inpos2.extend(range(p, p + c))\r
- inpos2.extend([p + c - 1])\r
- else:\r
- # 表記と出力の文字数が変化する場合\r
- for x in range(outlen):\r
- inpos2.append(p + int(float(x) * hyolen / outlen))\r
- p += hyolen\r
- if li[i].sepflag:\r
- outbuf += ' '\r
- if p > 0:\r
- inpos2.append(p - 1) # マスアケは直前の文字に対応\r
- else:\r
- inpos2.append(p)\r
- # rstrip with inpos2\r
- if inbuf[-1] != ' ':\r
- while outbuf[-1:] == ' ':\r
- outbuf = outbuf[:-1]\r
- inpos2.pop()\r
- return (outbuf, inpos2)\r
-\r
-RE_MB_ALPHA_NUM_SPACE = re.compile('^[0-9A-Za-z 0-9A-Za-z ]+$')\r
-RE_ASCII_CHARS = re.compile('^[A-Za-z0-9\.\,\-\+\:\/\~\?\&\%\#\*\$\; ]+$')\r
-RE_INFOMATION = re.compile('^[A-Za-z0-9\+\@\/\#\$\%\&\*\;\.\<\>\-\_\{\}\[\] ]+$')\r
-RE_GAIJI = re.compile('^[A-Za-z][A-Za-z0-9\,\.\+\- ]+$')\r
-RE_KATAKANA = re.compile('^[ァ-ヾ]+$')\r
-RE_HIRAGANA = re.compile('^[ぁ-ゞ]+$')\r
-\r
-def japanese_braille_separate(inbuf, logwrite):\r
- text = inbuf\r
- if RE_MB_ALPHA_NUM_SPACE.match(text):\r
- outbuf = unicode_normalize(text)\r
- inpos2 = range(len(outbuf))\r
- return (outbuf, inpos2)\r
-\r
- # 'あ゛ー' Unicode 正規化されて空白が入るので事前に補正する\r
- text = text.replace('あ゛', 'あ')\r
- text = text.replace('ヱ゛', 'ヴェ')\r
- text = text.replace('ヲ゛', 'ヴォ')\r
- text = text.replace('ワ゛', 'ヴァ')\r
-\r
- # 'ふにゃ~'\r
- text = text.replace('ゃ~', 'ゃー')\r
-\r
- text = Mecab_text2mecab(text)\r
- mf = MecabFeatures()\r
- Mecab_analysis(text, mf)\r
- Mecab_correctFeatures(mf)\r
- Mecab_print(mf, logwrite, output_header = False)\r
- li = mecab_to_morphs(mf)\r
- mf = None\r
-\r
- for mo in li:\r
- if mo.hinshi1 == '空白':\r
- mo.output = ' '\r
- elif mo.hinshi2 == '数' and mo.nhyouki.isdigit():\r
- # digit numbers (not kanji characters)\r
- mo.output = mo.nhyouki\r
-\r
- li = replace_morphs(li, CONNECTED_MORPHS)\r
- li = replace_digit_morphs(li)\r
- li = rewrite_number(li)\r
-\r
- # before: う,う,助動詞,*,*,*,ウ,ウ,0/1,ウ,0\r
- # after: う,う,助動詞,*,*,*,ウ,ウ,0/1,ー,0\r
- for mo in li:\r
- if mo.hyouki == 'う' and mo.hinshi1 == '助動詞':\r
- mo.output = 'ー'\r
-\r
- # before: a,a,記号,アルファベット,*,*,エイ,エイ,1/2,エイ,0\r
- # after: a,a,記号,アルファベット,*,*,エイ,エイ,1/2,a,0\r
- for mo in li:\r
- if mo.hinshi2 == 'アルファベット':\r
- mo.output = mo.nhyouki\r
-\r
- li = replace_alphabet_morphs(li)\r
-\r
- for mo in li:\r
- if mo.hyouki == '〝':\r
- mo.hinshi1 = '記号'\r
- mo.hinshi2 = '括弧開'\r
- if mo.hyouki == '〟':\r
- mo.hinshi1 = '記号'\r
- mo.hinshi2 = '括弧閉'\r
-\r
- for mo in li:\r
- if mo.hinshi2 in ('括弧開', '括弧閉'):\r
- mo.output = mo.nhyouki\r
-\r
- # before: , ,記号,空白,*,*, , ,*/*, ,0\r
- # after: , ,記号,空白,*,*, , ,*/*, ,0\r
- for mo in li:\r
- if mo.hyouki == ' ': # full shape space\r
- mo.output = ' '\r
-\r
- # 数字の前の全角アポストロフィを半角にする\r
- # before:\r
- # ’,’,記号,括弧閉,*,*,’,’,*/*,’,0\r
- # 0,0,名詞,数,*,*,ゼロ,ゼロ,1/2,0,0\r
- # after:\r
- # ’,’,記号,括弧閉,*,*,’,’,*/*,',0\r
- # 0,0,名詞,数,*,*,ゼロ,ゼロ,1/2,0,0\r
- for pos in range(0, len(li) - 1):\r
- if li[pos].hyouki == '’' and li[pos+1].hinshi2 == '数':\r
- li[pos].output = "'"\r
-\r
- # 算用数字ではさまれた読点と中点を数符にする\r
- # before:\r
- # 二,二,名詞,数,*,*,2,2,1/2,2,0\r
- # 、,、,記号,読点,*,*,、,、,*/*,、,0\r
- # 三,三,名詞,数,*,*,3,3,1/2,3,0\r
- # after:\r
- # 二,二,名詞,数,*,*,2,2,1/2,2,0\r
- # 、,、,記号,読点,*,*,、,、,*/*,⠼,0\r
- # 三,三,名詞,数,*,*,3,3,1/2,3,0\r
- for pos in range(1, len(li) - 1):\r
- if li[pos-1].output.isdigit() and \\r
- li[pos].hyouki in ('、', '・') and \\r
- li[pos+1].output.isdigit():\r
- li[pos].output = '⠼'\r
-\r
- # 記号を Unicode 正規化\r
- for mo in li:\r
- if mo.hinshi1 == '記号' and mo.hinshi2 == '一般':\r
- mo.output = mo.nhyouki\r
- if mo.hyouki == '.' and mo.hinshi1 == '名詞' and mo.hinshi2 == '数':\r
- mo.output = '.'\r
- if mo.hyouki == ',' and mo.hinshi1 == '名詞' and mo.hinshi2 == '数':\r
- mo.output = ','\r
- if mo.hinshi1 == '記号' and mo.hinshi2 == '句点' and mo.nhyouki == '.':\r
- mo.output = '.'\r
- if mo.hinshi1 == '記号' and mo.hinshi2 == '読点' and mo.nhyouki == ',':\r
- mo.output = ','\r
-\r
- # before: ab,ab,名詞,一般,*,*,アブ,アブ,1/2,アブ,0\r
- # after: ab,ab,名詞,一般,*,*,アブ,アブ,1/2,ab,0\r
- # before: No.,No.,接頭詞,数接続,*,*,ナンバー,ナンバー,1/4,ナンバー,0\r
- # after: No.,No.,接頭詞,数接続,*,*,ナンバー,ナンバー,1/4,No.,0\r
- for mo in li:\r
- if RE_ASCII_CHARS.match(mo.nhyouki):\r
- mo.output = mo.nhyouki\r
-\r
- # before: ヒロイノ,ヒロイノ,名詞,一般,*,*,,,,,0\r
- # after: ヒロイノ,ヒロイノ,名詞,一般,*,*,,,,ヒロイノ,0\r
- # before: ィ,ィ,名詞,一般,*,*,,,,,0\r
- # after: ィ,ィ,名詞,一般,*,*,,,,ィ,0\r
- # before: ぁ,ぁ,名詞,一般,*,*,,,,,0\r
- # after: ぁ,ぁ,名詞,一般,*,*,,,,ァ,0\r
- for mo in li:\r
- if not mo.output and mo.nhyouki != 'ー':\r
- if RE_KATAKANA.match(mo.nhyouki):\r
- mo.output = mo.nhyouki\r
- elif RE_HIRAGANA.match(mo.nhyouki):\r
- mo.output = ''.join([unichr(ord(c) + 0x60) for c in mo.nhyouki])\r
-\r
- # 単語が小文字カタカナのみであれば修正\r
- # 表記は修正せず should_separate() で小文字として判定される\r
- for mo in li:\r
- if mo.output == 'ァ': mo.output = 'ア'\r
- if mo.output == 'ィ': mo.output = 'イ'\r
- if mo.output == 'ゥ': mo.output = 'ウ'\r
- if mo.output == 'ェ': mo.output = 'エ'\r
- if mo.output == 'ォ': mo.output = 'オ'\r
- if mo.output == 'ッ': mo.output = 'ツ'\r
- if mo.output == 'ャ': mo.output = 'ヤ'\r
- if mo.output == 'ュ': mo.output = 'ユ'\r
- if mo.output == 'ョ': mo.output = 'ヨ'\r
- if mo.output == 'ヮ': mo.output = 'ワ'\r
- if mo.output == 'ヵ': mo.output = 'カ'\r
- if mo.output == 'ヶ': mo.output = 'ケ'\r
-\r
- for mo in li:\r
- # 情報処理点字の開始記号と終了記号\r
- if RE_INFOMATION.match(mo.nhyouki) and \\r
- ('@' in mo.nhyouki) or ('://' in mo.nhyouki) or ('\\' in mo.nhyouki):\r
- mo.output = '⠠⠦' + mo.nhyouki + '⠠⠴'\r
- # 外国語引用符\r
- # 空白をはさまない1単語は外国語引用符ではなく外字符で\r
- elif RE_GAIJI.match(mo.nhyouki) and \\r
- (' ' in mo.nhyouki) or ('.' in mo.nhyouki and len(mo.nhyouki) > 3):\r
- mo.output = '⠦' + mo.nhyouki + '⠴'\r
-\r
- for mo in li:\r
- # 情報処理点字でも外国語引用符でもなく output が & を含む場合は前後をあける\r
- if not mo.output.startswith('⠠⠦') and not mo.output.startswith('⠦'):\r
- # &\r
- if mo.output == '&':\r
- continue\r
- # &xx\r
- elif mo.output.startswith('&'):\r
- mo.output = mo.output.replace('&', '& ')\r
- # xx&\r
- elif mo.output.endswith('&'):\r
- mo.output = mo.output.replace('&', ' &')\r
- # xx&xx\r
- else:\r
- mo.output = mo.output.replace('&', ' & ')\r
- \r
- # 日付の和語読み処理\r
- li = fix_japanese_date_morphs(li)\r
-\r
- # 分かち書き判定\r
- for i in xrange(1, len(li)):\r
- prev2_mo = li[i-2] if i-2 >= 0 else None\r
- prev_mo = li[i-1]\r
- next_mo = li[i+1] if i+1 < len(li) else None\r
- li[i-1].sepflag = should_separate(prev2_mo, prev_mo, li[i], next_mo)\r
-\r
- for mo in li:\r
- mo.write(logwrite)\r
- logwrite('')\r
-\r
- outbuf, inpos2 = morphs_to_string(li, inbuf, logwrite)\r
- return (outbuf, inpos2)\r
-\r
-mecab_initialized = False\r
-\r
-def initialize(jtalk_dir=None, logwrite=_logwrite):\r
- global mecab_initialized\r
- if jtalk_dir:\r
- Mecab_initialize(logwrite, jtalk_dir)\r
- else:\r
- Mecab_initialize(logwrite)\r
- if logwrite: logwrite("initialize() done.")\r
- mecab_initialized = True\r
-\r
-def terminate():\r
- global _logwrite\r
- if _logwrite: _logwrite("terminate() done.")\r
- global mecab_initialized\r
- mecab_initialized = False\r
-\r
-def translateWithInPos2(inbuf, logwrite=_logwrite):\r
- if not mecab_initialized:\r
- initialize()\r
- outbuf, inpos2 = japanese_braille_separate(inbuf, logwrite)\r
- result, inpos1 = translator1.translateWithInPos(outbuf)\r
- result = result.replace('□', ' ')\r
- return (outbuf, result, inpos1, inpos2)\r
-\r
-# for brailleViewer\r
-def getReadingAndBraille(text, logwrite=_logwrite):\r
- return translateWithInPos2(text, logwrite=logwrite)[0:2]\r
-\r
-# returns '\u2801\u2802\u2803\u2804\u2805\u2806\u2807'\r
-def japaneseToUnicodeBraille(text, logwrite=_logwrite):\r
- return translateWithInPos2(text, logwrite=logwrite)[0]\r
-\r
-def makeOutPos(inPos, inlen, outlen):\r
- # make outPos\r
- outPos = [-1] * inlen\r
- for p in range(outlen):\r
- if inPos[p] < len(outPos) and (outPos[ inPos[p] ] == -1 or inPos[p] == 0):\r
- outPos[ inPos[p] ] = p\r
- # fill skipped outPos\r
- prev = 0\r
- for p in range(inlen):\r
- if outPos[p] == -1:\r
- outPos[p] = prev\r
- else:\r
- prev = outPos[p]\r
- return outPos\r
-\r
-def mergePositionMap(inpos1, inpos2, outlen, inlen):\r
- inPos = [0] * outlen\r
- for p in range(outlen):\r
- inPos[p] = inpos2[ inpos1[p] ]\r
- outPos = makeOutPos(inPos, inlen, outlen)\r
- return inPos, outPos\r
-\r
-# louis-compatible method\r
-# tableList, typeform are not supported.\r
-# mode=dotsIO is default.\r
-def translate(inbuf, cursorPos=0, logwrite=_logwrite, unicodeIO=False):\r
- """Translate a string of characters, providing position information.\r
- @param inbuf: The string to translate.\r
- @type inbuf: str\r
- @param cursorPos: The position of the cursor in inbuf.\r
- @type cursorPos: int\r
- @return: A tuple of:\r
- the translated string,\r
- a list of input positions for each position in the output,\r
- a list of output positions for each position in the input, and\r
- the position of the cursor in the output.\r
- @rtype: (str, list of int, list of int, int)\r
- @raise RuntimeError: If a complete translation could not be done.\r
- """\r
- sp, outbuf, inpos1, inpos2 = translateWithInPos2(inbuf, logwrite=logwrite)\r
- if not unicodeIO:\r
- pat = outbuf.replace(' ', '\u2800')\r
- outbuf = ''.join([unichr((ord(c) - 0x2800) + 0x8000) for c in pat])\r
- inPos, outPos = mergePositionMap(inpos1, inpos2, len(outbuf), len(inbuf))\r
- cursorPos = outPos[cursorPos]\r
- return (outbuf, inPos, outPos, cursorPos)\r
+++ /dev/null
-#synthDrivers/nvdajp_jtalk.py\r
-# -*- coding: utf-8 -*-\r
-#A part of NonVisual Desktop Access (NVDA)\r
-#Copyright (C) 2006-2010 NVDA Contributors <http://www.nvda-project.org/>\r
-#This file is covered by the GNU General Public License.\r
-#See the file COPYING for more details.\r
-#\r
-# nvdajp_jtalk (based on Open JTalk and libopenjtalk)\r
-# Copyright (C) 2010-2011 Takuya Nishimoto (nishimotz.com)\r
-# Copyright (C) 2013 Masamitsu Misono (043.jp)\r
-\r
-from synthDriverHandler import SynthDriver,VoiceInfo,BooleanSynthSetting\r
-from collections import OrderedDict\r
-from logHandler import log\r
-import speech\r
-import synthDriverHandler\r
-import languageHandler\r
-from jtalk import _nvdajp_jtalk\r
-from jtalk._nvdajp_jtalk import VoiceProperty\r
-\r
-class SynthDriver(SynthDriver):\r
- """A Japanese synth driver for NVDAjp.\r
- """\r
- name = "nvdajp_jtalk"\r
- description = "JTalk"\r
- supportedSettings=(\r
- SynthDriver.VoiceSetting(),\r
- SynthDriver.RateSetting(),\r
- BooleanSynthSetting("rateBoost",_("Rate boos&t")),\r
- SynthDriver.PitchSetting(),\r
- SynthDriver.InflectionSetting(),\r
- SynthDriver.VolumeSetting()\r
- )\r
-\r
- @classmethod\r
- def check(cls):\r
- return True\r
-\r
- def __init__(self):\r
- self.voice_id = 'V2'\r
- self._volume = 100\r
- self._pitch = 50\r
- self._inflection = 50\r
- self._rateBoost = False\r
- _nvdajp_jtalk.initialize()\r
- self.rate = 50\r
-\r
- def speak(self,speechSequence):\r
- finalIndex = None\r
- spellState = False\r
- defaultLanguage = languageHandler.getLanguage()\r
- if defaultLanguage[:2] == 'ja': defaultLanguage = 'ja'\r
- lang = defaultLanguage\r
- currentLang = lang\r
- for item in speechSequence:\r
- if isinstance(item,basestring):\r
- p = VoiceProperty()\r
- p.pitch = self._pitch\r
- p.inflection = self._inflection\r
- p.characterMode = spellState\r
- _nvdajp_jtalk.speak(unicode(item), currentLang, index=finalIndex, voiceProperty_=p)\r
- elif isinstance(item,speech.IndexCommand):\r
- finalIndex = item.index\r
- elif isinstance(item,speech.CharacterModeCommand):\r
- if item.state: \r
- spellState = True \r
- else: \r
- spellState = True \r
- elif isinstance(item,speech.LangChangeCommand):\r
- lang = (item.lang if item.lang else defaultLanguage).replace('_','-')\r
- if lang[:2] == 'ja': lang = 'ja'\r
- currentLang = lang\r
- elif isinstance(item,speech.SpeechCommand):\r
- log.debugWarning("Unsupported speech command: %s"%item)\r
- else:\r
- log.error("Unknown speech: %s"%item)\r
-\r
- def cancel(self):\r
- _nvdajp_jtalk.stop()\r
-\r
- def pause(self,switch):\r
- _nvdajp_jtalk.pause(switch)\r
-\r
- def isSpeaking(self):\r
- return _nvdajp_jtalk.isSpeaking()\r
-\r
- def _get_rateBoost(self):\r
- return self._rateBoost\r
-\r
- def _set_rateBoost(self, enable):\r
- if enable == self._rateBoost:\r
- return\r
- rate = self.rate\r
- self._rateBoost = enable\r
- self.rate = rate\r
-\r
- def terminate(self):\r
- _nvdajp_jtalk.terminate()\r
-\r
- # The current rate; ranges between 0 and 100\r
- def _get_rate(self):\r
- return _nvdajp_jtalk.get_rate(self._rateBoost)\r
-\r
- def _set_rate(self,rate):\r
- _nvdajp_jtalk.set_rate(int(rate), self._rateBoost)\r
-\r
- def _get_pitch(self):\r
- return self._pitch\r
-\r
- def _set_pitch(self,pitch):\r
- self._pitch = int(pitch)\r
-\r
- def _get_volume(self):\r
- return self._volume\r
-\r
- def _set_volume(self,volume_):\r
- self._volume = int(volume_)\r
- _nvdajp_jtalk.set_volume(self._volume)\r
- return\r
-\r
- def _get_inflection(self):\r
- return self._inflection\r
-\r
- def _set_inflection(self,val):\r
- self._inflection = int(val)\r
-\r
- def _getAvailableVoices(self):\r
- log.debug("_getAvailableVoices called")\r
- voices = OrderedDict()\r
- for v in _nvdajp_jtalk._jtalk_voices:\r
- voices[v['id']] = VoiceInfo(v['id'], v['name'], v['lang'])\r
- return voices\r
-\r
- def _get_voice(self):\r
- log.debug("_get_voice called")\r
- return self.voice_id\r
-\r
- def _set_voice(self, identifier):\r
- log.debug("_set_voice %s" % (identifier))\r
- rate = _nvdajp_jtalk.get_rate(self._rateBoost)\r
- for v in _nvdajp_jtalk._jtalk_voices:\r
- if v['id'] == identifier:\r
- if self.voice_id != identifier:\r
- self.voice_id = identifier\r
- _nvdajp_jtalk.terminate()\r
- _nvdajp_jtalk.initialize(v)\r
- _nvdajp_jtalk.set_rate(rate,self._rateBoost)\r
- _nvdajp_jtalk.set_volume(self._volume)\r
- return\r
- return\r
-\r
- def _get_lastIndex(self):\r
- if _nvdajp_jtalk.lastIndex is None:\r
- #log.debug("_get_lastIndex returns None")\r
- return None\r
- #log.debug("_get_lastIndex returns %d" % _nvdajp_jtalk.lastIndex)\r
- return _nvdajp_jtalk.lastIndex\r