ti31182 work in progress

author Takuya Nishimoto <nishimotz@gmail.com>

Mon, 9 Jun 2014 05:18:46 +0000 (14:18 +0900)

committer Takuya Nishimoto <nishimotz@gmail.com>

Mon, 9 Jun 2014 05:18:46 +0000 (14:18 +0900)
author Takuya Nishimoto <nishimotz@gmail.com>
Mon, 9 Jun 2014 05:18:46 +0000 (14:18 +0900)
committer Takuya Nishimoto <nishimotz@gmail.com>
Mon, 9 Jun 2014 05:18:46 +0000 (14:18 +0900)
diff --git a/jptools/jpBrailleRunner.py b/jptools/jpBrailleRunner.py

index bbffd34..cd999b0 100644 (file)
--- a/jptools/jpBrailleRunner.py
+++ b/jptools/jpBrailleRunner.py
@@ -104,10 +104,13 @@ def pass2(verboseMode=False):
                 f.write("\n")\r
                 count = 0\r
                 for t in tests:\r
+                       nabcc = False\r
+                       if t.has_key('mode') and t['mode'] == 'NABCC':\r
+                               nabcc = True\r
                         if t.has_key('text'):\r
                                 output = cStringIO.StringIO()\r
                                 result, pat, inpos1, inpos2 = translator2.translateWithInPos2(\r
-                                       t['text'], logwrite=__print)\r
+                                       t['text'], logwrite=__print, nabcc=nabcc)\r
                                 log = output.getvalue()\r
                                 output.close()\r
                                 # inpos2\r
diff --git a/jptools/nabccHarness.py b/jptools/nabccHarness.py

index f8b0f47..04afabd 100644 (file)
--- a/jptools/nabccHarness.py
+++ b/jptools/nabccHarness.py
@@ -19,6 +19,11 @@ tests = [
                 },
         {
                 'mode':   'NABCC',
+               'input':  'aアbcdefghijklmnopqrstuvwxyzア',
+               'output': '⠁⠁⠃⠉⠙⠑⠋⠛⠓⠊⠚⠅⠇⠍⠝⠕⠏⠟⠗⠎⠞⠥⠧⠺⠭⠽⠵⠁',
+               },
+       {
+               'mode':   'NABCC',
                 'input':  '1234567890',
                 'output': '⠂⠆⠒⠲⠢⠖⠶⠦⠔⠴',
                 },
@@ -52,4 +57,40 @@ tests = [
                 'input':  '^@#\\|/*',
                 'output': '⡘⡈⠼⡳⠳⠌⠡',
                 },
+       {
+               'mode':   'NABCC',
+               'text': 'ab 123　あab 123　あ',
+               'input': 'ab 123 ア ab 123 ア',
+               'output': '⠁⠃ ⠂⠆⠒ ⠁ ⠁⠃ ⠂⠆⠒ ⠁',
+               },
+       {
+               'mode':   'NABCC',
+               'text': 'ユーザーガイド(U)',
+               'input': 'ユーザー ガイド(U)',
+               'output': '⠬⠒⠐⠱⠒ ⠐⠡⠃⠐⠞⠷⡥⠾',
+               },
+       {
+               'mode':   'NABCC',
+               'text': 'アドレスはnv@nvda.jpです。',
+               'input': 'アドレスワ nv@nvda.jp デス。',
+               'output': '⠁⠐⠞⠛⠹⠄ ⠝⠧⡈⠝⠧⠙⠁⠨⠚⠏ ⠐⠟⠹⠲',
+               },
+       {
+               'mode':   'NABCC',
+               'text': 'C:\\Program Files',
+               'input': 'C:\\Program Files',
+               'output': '⡉⠱⡳⡏⠗⠕⠛⠗⠁⠍ ⡋⠊⠇⠑⠎',
+               },
+       {
+               'mode':   'NABCC',
+               'text': 'C:\\Users\\ユーザー',
+               'input': 'C:\\Users\\ ユーザー',
+               'output': '⡉⠱⡳⡥⠎⠑⠗⠎⡳ ⠬⠒⠐⠱⠒',
+               },
+       {
+               'mode':   'NABCC',
+               'text': '二、三',
+               'input': '2.3',
+               'output': '⠆⠨⠒',
+               },
         ]
diff --git a/source/synthDrivers/jtalk/translator2.py b/source/synthDrivers/jtalk/translator2.py

index c2ac924..02499bf 100644 (file)
--- a/source/synthDrivers/jtalk/translator2.py
+++ b/source/synthDrivers/jtalk/translator2.py
@@ -431,7 +431,7 @@ def fix_japanese_date_morphs(li):
                         new_li.append(li[i])\r
         return new_li\r
  \r
-def should_separate(prev2_mo, prev_mo, mo, next_mo):\r
+def should_separate(prev2_mo, prev_mo, mo, next_mo, nabcc=False):\r
         if mo.hyouki == 'ー': return False\r
         if prev_mo.hyouki == 'ー': return False\r
         if mo.hyouki in 'ぁぃぅぇぉっゃゅょゎァィゥェォッャュョヮヵヶ': return False\r
@@ -544,6 +544,8 @@ def should_separate(prev2_mo, prev_mo, mo, next_mo):
         # を,を,助詞,格助詞,一般,*,ヲ,ヲ,0/1,ヲ,0\r
         if is_alpha(prev_mo.nhyouki) and mo.hinshi1 in ('助詞', '助動詞'):\r
                 return True\r
+       if nabcc and (prev_mo.hinshi2 == 'アルファベット') and mo.hinshi1 in ('助詞', '助動詞'):\r
+               return True\r
  \r
         # ピリオドの後の助詞\r
         if prev_mo.nhyouki.endswith('.') and mo.hinshi1 == '助詞':\r
@@ -777,7 +779,7 @@ def to_dakuon_kana(s):
                 return DAKUON_DIC[s]\r
         return s\r
  \r
-def japanese_braille_separate(inbuf, logwrite):\r
+def japanese_braille_separate(inbuf, logwrite, nabcc=False):\r
         text = inbuf\r
         if RE_MB_ALPHA_NUM_SPACE.match(text):\r
                 outbuf = unicode_normalize(text)\r
@@ -875,7 +877,10 @@ def japanese_braille_separate(inbuf, logwrite):
                 if li[pos-1].output.isdigit() and \\r
                                 li[pos].hyouki in ('、', '・') and \\r
                                 li[pos+1].output.isdigit():\r
-                       li[pos].output = '⠼'\r
+                       if nabcc:\r
+                               li[pos].output = '.'\r
+                       else:\r
+                               li[pos].output = '⠼'\r
  \r
         # before: ａｂ,ab,名詞,一般,*,*,アブ,アブ,1/2,アブ,0\r
         # after:  ａｂ,ab,名詞,一般,*,*,アブ,アブ,1/2,ab,0\r
@@ -955,12 +960,18 @@ def japanese_braille_separate(inbuf, logwrite):
                 # 情報処理点字の開始記号と終了記号\r
                 if RE_INFOMATION.match(mo.nhyouki) and \\r
                                 ('@' in mo.nhyouki) or ('://' in mo.nhyouki) or ('\\' in mo.nhyouki):\r
-                       mo.output = '⠠⠦' + mo.nhyouki + '⠠⠴'\r
+                       if nabcc:\r
+                               mo.output = mo.nhyouki\r
+                       else:\r
+                               mo.output = '⠠⠦' + mo.nhyouki + '⠠⠴'\r
                 # 外国語引用符\r
                 # 空白をはさまない1単語は外国語引用符ではなく外字符で\r
                 elif RE_GAIJI.match(mo.nhyouki) and \\r
                                 (' ' in mo.nhyouki) or ('.' in mo.nhyouki and len(mo.nhyouki) > 3):\r
-                       mo.output = '⠦' + mo.nhyouki + '⠴'\r
+                       if nabcc:\r
+                               mo.output = mo.nhyouki\r
+                       else:\r
+                               mo.output = '⠦' + mo.nhyouki + '⠴'\r
  \r
         for mo in li:\r
                 # 情報処理点字でも外国語引用符でもなく output が & を含む場合は前後をあける\r
@@ -986,7 +997,7 @@ def japanese_braille_separate(inbuf, logwrite):
                 prev2_mo = li[i-2] if i-2 >= 0 else None\r
                 prev_mo = li[i-1]\r
                 next_mo = li[i+1] if i+1 < len(li) else None\r
-               li[i-1].sepflag = should_separate(prev2_mo, prev_mo, li[i], next_mo)\r
+               li[i-1].sepflag = should_separate(prev2_mo, prev_mo, li[i], next_mo, nabcc=nabcc)\r
  \r
         for mo in li:\r
                 mo.write(logwrite)\r
@@ -1012,11 +1023,11 @@ def terminate():
         global mecab_initialized\r
         mecab_initialized = False\r
  \r
-def translateWithInPos2(inbuf, logwrite=_logwrite):\r
+def translateWithInPos2(inbuf, logwrite=_logwrite, nabcc=False):\r
         if not mecab_initialized:\r
                 initialize()\r
-       outbuf, inpos2 = japanese_braille_separate(inbuf, logwrite)\r
-       result, inpos1 = translator1.translateWithInPos(outbuf)\r
+       outbuf, inpos2 = japanese_braille_separate(inbuf, logwrite, nabcc=nabcc)\r
+       result, inpos1 = translator1.translateWithInPos(outbuf, nabcc=nabcc)\r
         result = result.replace('□', ' ')\r
         return (outbuf, result, inpos1, inpos2)\r
  \r
author	Takuya Nishimoto <nishimotz@gmail.com>
	Mon, 9 Jun 2014 05:18:46 +0000 (14:18 +0900)
committer	Takuya Nishimoto <nishimotz@gmail.com>
	Mon, 9 Jun 2014 05:18:46 +0000 (14:18 +0900)
jptools/jpBrailleRunner.py		patch \| blob \| history
jptools/nabccHarness.py		patch \| blob \| history
source/synthDrivers/jtalk/translator2.py		patch \| blob \| history