OSDN Git Service

implement interlaced encoding(beta).
[rec10/rec10-git.git] / rec10 / trunk / src / zenhan.py
index 9a1e702..876327b 100644 (file)
@@ -1,14 +1,19 @@
 #!/usr/bin/python
 # coding: UTF-8
 # Rec10 TS Recording Tools
-# Copyright (C) 2009 Yukikaze
+# Copyright (C) 2009-2010 Yukikaze
 
+import recdblist
 global z_ascii
 global h_ascii
 global z_number
 global h_number
-z_ascii = u"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz !”#$%&’()*+,−./:;<=>?@[¥]^_‘{|}〜 "
-h_ascii = u"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz !\"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ "
+z_ascii = u"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz !”#$%&’()*+,−./:;<=>?@[¥]^_‘{|}〜 〜"
+h_ascii = u"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz !\"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ -"
+#z_ascii = z_ascii+Ur"\Ue28892\Uefbc8e"
+#h_ascii = h_ascii+Ur"\Uefbc8d."
+z_ascii_sp = unichr(0x2212)+unichr(0xff0e)
+h_ascii_sp = unichr(0x002d)+unichr(0x002e)
 z_number = u"0123456789"
 h_number = u"0123456789"
 z_alphabet = u"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz #"
@@ -19,6 +24,9 @@ def toHankaku(str):
         i = z_ascii.find(s)
         if (i != -1):
             s = h_ascii[i]
+        i = z_ascii_sp.find(s)
+        if (i != -1):
+            s = h_ascii_sp[i]
         i = z_number.find(s)
         if (i != -1):
             s = h_number[i]
@@ -30,6 +38,9 @@ def toHankaku_ABC123(str):
         i = z_alphabet.find(s)
         if (i != -1):
             s = h_alphabet[i]
+        i = z_ascii_sp.find(s)
+        if (i != -1):
+            s = h_ascii_sp[i]
         i = z_number.find(s)
         if (i != -1):
             s = h_number[i]
@@ -39,13 +50,13 @@ def check_Character_Type(character):
     """
     return code is 1:Alphabet 2:Hiragana 3:Katakana 4:Kanji
     """
-    #print character
-    #print type(character)
+    #recdblist.printutf8(character)
+    #recdblist.printutf8(type(character))
     #character=character.encode('UTF-8')
-    #print character
-    #print type(character)
+    #recdblist.printutf8(character)
+    #recdblist.printutf8(type(character))
     chcode=ord(character)
-    #print ord(chcode)
+    #recdblist.printutf8(ord(chcode))
     if chcode>=0x0000 and chcode<=0x007F:
         return 1
     elif chcode>=0x3040 and chcode<=0x309F:
@@ -53,4 +64,4 @@ def check_Character_Type(character):
     elif chcode>=0x30A0 and chcode<=0x30FF:
         return 3
     elif chcode>=0x4E00 and chcode<=0x9FFF:
-        return 4
\ No newline at end of file
+        return 4