OSDN Git Service

ti31182 work in progress
authorTakuya Nishimoto <nishimotz@gmail.com>
Mon, 9 Jun 2014 05:18:46 +0000 (14:18 +0900)
committerTakuya Nishimoto <nishimotz@gmail.com>
Mon, 9 Jun 2014 05:18:46 +0000 (14:18 +0900)
jptools/jpBrailleRunner.py
jptools/nabccHarness.py
source/synthDrivers/jtalk/translator2.py

index bbffd34..cd999b0 100644 (file)
@@ -104,10 +104,13 @@ def pass2(verboseMode=False):
                f.write("\n")\r
                count = 0\r
                for t in tests:\r
+                       nabcc = False\r
+                       if t.has_key('mode') and t['mode'] == 'NABCC':\r
+                               nabcc = True\r
                        if t.has_key('text'):\r
                                output = cStringIO.StringIO()\r
                                result, pat, inpos1, inpos2 = translator2.translateWithInPos2(\r
-                                       t['text'], logwrite=__print)\r
+                                       t['text'], logwrite=__print, nabcc=nabcc)\r
                                log = output.getvalue()\r
                                output.close()\r
                                # inpos2\r
index f8b0f47..04afabd 100644 (file)
@@ -19,6 +19,11 @@ tests = [
                },
        {
                'mode':   'NABCC',
+               'input':  'aアbcdefghijklmnopqrstuvwxyzア',
+               'output': '⠁⠁⠃⠉⠙⠑⠋⠛⠓⠊⠚⠅⠇⠍⠝⠕⠏⠟⠗⠎⠞⠥⠧⠺⠭⠽⠵⠁',
+               },
+       {
+               'mode':   'NABCC',
                'input':  '1234567890',
                'output': '⠂⠆⠒⠲⠢⠖⠶⠦⠔⠴',
                },
@@ -52,4 +57,40 @@ tests = [
                'input':  '^@#\\|/*',
                'output': '⡘⡈⠼⡳⠳⠌⠡',
                },
+       {
+               'mode':   'NABCC',
+               'text': 'ab 123 あab 123 あ',
+               'input': 'ab 123 ア ab 123 ア',
+               'output': '⠁⠃ ⠂⠆⠒ ⠁ ⠁⠃ ⠂⠆⠒ ⠁',
+               },
+       {
+               'mode':   'NABCC',
+               'text': 'ユーザーガイド(U)',
+               'input': 'ユーザー ガイド(U)',
+               'output': '⠬⠒⠐⠱⠒ ⠐⠡⠃⠐⠞⠷⡥⠾',
+               },
+       {
+               'mode':   'NABCC',
+               'text': 'アドレスはnv@nvda.jpです。',
+               'input': 'アドレスワ nv@nvda.jp デス。',
+               'output': '⠁⠐⠞⠛⠹⠄ ⠝⠧⡈⠝⠧⠙⠁⠨⠚⠏ ⠐⠟⠹⠲',
+               },
+       {
+               'mode':   'NABCC',
+               'text': 'C:\\Program Files',
+               'input': 'C:\\Program Files',
+               'output': '⡉⠱⡳⡏⠗⠕⠛⠗⠁⠍ ⡋⠊⠇⠑⠎',
+               },
+       {
+               'mode':   'NABCC',
+               'text': 'C:\\Users\\ユーザー',
+               'input': 'C:\\Users\\ ユーザー',
+               'output': '⡉⠱⡳⡥⠎⠑⠗⠎⡳ ⠬⠒⠐⠱⠒',
+               },
+       {
+               'mode':   'NABCC',
+               'text': '二、三',
+               'input': '2.3',
+               'output': '⠆⠨⠒',
+               },
        ]
index c2ac924..02499bf 100644 (file)
@@ -431,7 +431,7 @@ def fix_japanese_date_morphs(li):
                        new_li.append(li[i])\r
        return new_li\r
 \r
-def should_separate(prev2_mo, prev_mo, mo, next_mo):\r
+def should_separate(prev2_mo, prev_mo, mo, next_mo, nabcc=False):\r
        if mo.hyouki == 'ー': return False\r
        if prev_mo.hyouki == 'ー': return False\r
        if mo.hyouki in 'ぁぃぅぇぉっゃゅょゎァィゥェォッャュョヮヵヶ': return False\r
@@ -544,6 +544,8 @@ def should_separate(prev2_mo, prev_mo, mo, next_mo):
        # を,を,助詞,格助詞,一般,*,ヲ,ヲ,0/1,ヲ,0\r
        if is_alpha(prev_mo.nhyouki) and mo.hinshi1 in ('助詞', '助動詞'):\r
                return True\r
+       if nabcc and (prev_mo.hinshi2 == 'アルファベット') and mo.hinshi1 in ('助詞', '助動詞'):\r
+               return True\r
 \r
        # ピリオドの後の助詞\r
        if prev_mo.nhyouki.endswith('.') and mo.hinshi1 == '助詞':\r
@@ -777,7 +779,7 @@ def to_dakuon_kana(s):
                return DAKUON_DIC[s]\r
        return s\r
 \r
-def japanese_braille_separate(inbuf, logwrite):\r
+def japanese_braille_separate(inbuf, logwrite, nabcc=False):\r
        text = inbuf\r
        if RE_MB_ALPHA_NUM_SPACE.match(text):\r
                outbuf = unicode_normalize(text)\r
@@ -875,7 +877,10 @@ def japanese_braille_separate(inbuf, logwrite):
                if li[pos-1].output.isdigit() and \\r
                                li[pos].hyouki in ('、', '・') and \\r
                                li[pos+1].output.isdigit():\r
-                       li[pos].output = '⠼'\r
+                       if nabcc:\r
+                               li[pos].output = '.'\r
+                       else:\r
+                               li[pos].output = '⠼'\r
 \r
        # before: ab,ab,名詞,一般,*,*,アブ,アブ,1/2,アブ,0\r
        # after:  ab,ab,名詞,一般,*,*,アブ,アブ,1/2,ab,0\r
@@ -955,12 +960,18 @@ def japanese_braille_separate(inbuf, logwrite):
                # 情報処理点字の開始記号と終了記号\r
                if RE_INFOMATION.match(mo.nhyouki) and \\r
                                ('@' in mo.nhyouki) or ('://' in mo.nhyouki) or ('\\' in mo.nhyouki):\r
-                       mo.output = '⠠⠦' + mo.nhyouki + '⠠⠴'\r
+                       if nabcc:\r
+                               mo.output = mo.nhyouki\r
+                       else:\r
+                               mo.output = '⠠⠦' + mo.nhyouki + '⠠⠴'\r
                # 外国語引用符\r
                # 空白をはさまない1単語は外国語引用符ではなく外字符で\r
                elif RE_GAIJI.match(mo.nhyouki) and \\r
                                (' ' in mo.nhyouki) or ('.' in mo.nhyouki and len(mo.nhyouki) > 3):\r
-                       mo.output = '⠦' + mo.nhyouki + '⠴'\r
+                       if nabcc:\r
+                               mo.output = mo.nhyouki\r
+                       else:\r
+                               mo.output = '⠦' + mo.nhyouki + '⠴'\r
 \r
        for mo in li:\r
                # 情報処理点字でも外国語引用符でもなく output が & を含む場合は前後をあける\r
@@ -986,7 +997,7 @@ def japanese_braille_separate(inbuf, logwrite):
                prev2_mo = li[i-2] if i-2 >= 0 else None\r
                prev_mo = li[i-1]\r
                next_mo = li[i+1] if i+1 < len(li) else None\r
-               li[i-1].sepflag = should_separate(prev2_mo, prev_mo, li[i], next_mo)\r
+               li[i-1].sepflag = should_separate(prev2_mo, prev_mo, li[i], next_mo, nabcc=nabcc)\r
 \r
        for mo in li:\r
                mo.write(logwrite)\r
@@ -1012,11 +1023,11 @@ def terminate():
        global mecab_initialized\r
        mecab_initialized = False\r
 \r
-def translateWithInPos2(inbuf, logwrite=_logwrite):\r
+def translateWithInPos2(inbuf, logwrite=_logwrite, nabcc=False):\r
        if not mecab_initialized:\r
                initialize()\r
-       outbuf, inpos2 = japanese_braille_separate(inbuf, logwrite)\r
-       result, inpos1 = translator1.translateWithInPos(outbuf)\r
+       outbuf, inpos2 = japanese_braille_separate(inbuf, logwrite, nabcc=nabcc)\r
+       result, inpos1 = translator1.translateWithInPos(outbuf, nabcc=nabcc)\r
        result = result.replace('□', ' ')\r
        return (outbuf, result, inpos1, inpos2)\r
 \r