OSDN Git Service

work around ti34107
authorTakuya Nishimoto <nishimotz@gmail.com>
Mon, 4 Aug 2014 06:26:08 +0000 (15:26 +0900)
committerTakuya Nishimoto <nishimotz@gmail.com>
Mon, 4 Aug 2014 06:26:08 +0000 (15:26 +0900)
jptools/harness.py
source/synthDrivers/jtalk/translator2.py

index 69479dc..9ea7722 100644 (file)
@@ -2296,6 +2296,27 @@ tests = [
                'input': '2ヒャク 5⠼60',\r
                'output': '⠼⠃⠈⠥⠩ ⠼⠑⠼⠋⠚',\r
                },\r
+       {\r
+               'text': '二一',\r
+               'input': '21',\r
+               'output': '⠼⠃⠁',\r
+               },\r
+       {\r
+               'text': '二十一',\r
+               'input': '21',\r
+               'output': '⠼⠃⠁',\r
+               },\r
+       {\r
+               'text': '二十一二',\r
+               'input': '21⠼2',\r
+               'output': '⠼⠃⠁⠼⠃',\r
+               },\r
+       {\r
+               'text': '二十二三',\r
+               'input': '22⠼3',\r
+               'output': '⠼⠃⠃⠼⠉',\r
+               },\r
+\r
        { 'note': '7.特に必要があればそのまま数字を並べて書くことができる。その場合は3桁ごとに位取り点(⠄)を用いて書いてよい。' },\r
        {\r
                'text': '53,000',\r
index a72dbf9..41a68eb 100644 (file)
@@ -200,13 +200,15 @@ def replace_morphs(li, dic):
 RE_KANSUJI = re.compile('^[一二三四五六七八九〇零十拾百千壱二参]+$')\r
 \r
 # http://programminblog.blogspot.jp/2010/11/python.html\r
-def kansuji2arabic(text):\r
+def kansuji2arabic(text, logwrite=None):\r
        if not RE_KANSUJI.match(text):\r
-               return None\r
+               return (0, None) # 漢数字ではない場合\r
        result = 0\r
+       prevDigit = 0\r
        digit = 1\r
        numgroup = 1\r
        kanindex = len(text)\r
+       if logwrite: logwrite('kansuji2arabic: ' + text)\r
        while kanindex > 0:\r
                c = text[(kanindex - 1):kanindex]\r
                c1 = text[kanindex:(kanindex + 1)]\r
@@ -244,21 +246,33 @@ def kansuji2arabic(text):
                        elif c in '九':\r
                                result += 9 * digit * numgroup\r
                        digit *= 10\r
+               if logwrite: logwrite('kansuji2arabic c(%s) c1(%s) kanindex(%d) prevDigit(%d) digit(%d) result(%d) numgroup(%d)' % (c, c1, kanindex, prevDigit, digit, result, numgroup))\r
+               if prevDigit > digit:\r
+                       return (2, None) # およその数で数が重なる場合\r
+               prevDigit = digit\r
        if (digit == 10 and text[:1] in '十拾') or \\r
                        (digit == 100 and text[:1] in '百') or \\r
                        (digit == 1000 and text[:1] in '千'):\r
                result += digit * numgroup\r
        text = '%d' % result\r
-       return text\r
+       return (1, text) # 漢数字の場合\r
 \r
-def rewrite_number(li):\r
+def rewrite_number(li, logwrite=None):\r
        new_li = []\r
        for mo in li:\r
                m = copy.deepcopy(mo)\r
                if m.hinshi2 != '固有名詞':\r
-                       ret = kansuji2arabic(m.hyouki)\r
-                       if ret:\r
-                               m.output = ret\r
+                       flag, num = kansuji2arabic(m.hyouki, logwrite)\r
+                       if flag == 1:\r
+                               m.output = str(num)\r
+                       elif flag == 2 and len(m.hyouki) >= 2:\r
+                               # 「二十二三」のような場合「二十二」「三」に分割\r
+                               h1 = m.hyouki[:-1]\r
+                               flag1, num1 = kansuji2arabic(h1, logwrite)\r
+                               h2 = m.hyouki[-1:]\r
+                               flag2, num2 = kansuji2arabic(h2, logwrite)\r
+                               if flag1 == 1 and flag2 == 1:\r
+                                       m.output = str(num1) + '⠼' + str(num2)\r
                new_li.append(m)\r
        return new_li\r
 \r
@@ -826,7 +840,7 @@ def japanese_braille_separate(inbuf, logwrite, nabcc=False):
 \r
        li = replace_morphs(li, CONNECTED_MORPHS)\r
        li = replace_digit_morphs(li)\r
-       li = rewrite_number(li)\r
+       li = rewrite_number(li, logwrite)\r
 \r
        # before: う,う,助動詞,*,*,*,ウ,ウ,0/1,ウ,0\r
        # after:  う,う,助動詞,*,*,*,ウ,ウ,0/1,ー,0\r