OSDN Git Service

stop using trunk or dist directory in rec10 project.
[rec10/rec10-git.git] / dist / trunk / rec10 / n_gram.py
diff --git a/dist/trunk/rec10/n_gram.py b/dist/trunk/rec10/n_gram.py
deleted file mode 100755 (executable)
index f24a097..0000000
+++ /dev/null
@@ -1,120 +0,0 @@
-#!/usr/bin/python
-# coding: UTF-8
-# Rec10 TS Recording Tools
-# Copyright (C) 2009-2011 Yukikaze
-
-import zenhan
-def bigram(str1, str2):
-    str1 = zenhan.toHankaku(str1)
-    str2 = zenhan.toHankaku(str2)
-    str1 = str1.replace(" ", "")
-    str2 = str2.replace(" ", "")
-    """
-    bigramによる単語の近さを計算します。
-    """
-    gram = []
-    if len(str1) < 2:
-        gram.append(str1)
-    else:
-        tmp = str1[0]
-        for x in str1[1:]:
-            gram.append(tmp + x)
-            tmp = x
-    point = 0
-    for x in gram:
-        i = findGram(x, str2)
-        if i > 0:
-            i = 90 + 10 * i
-        else:
-            i = 0
-        point = point + i
-    return point
-def trigram(str1, str2):
-    """
-    trigramによる単語の近さを計算します。
-    単語文字列の長さも考慮にいれます。
-    """
-    str1 = zenhan.toHankaku(str1)
-    str2 = zenhan.toHankaku(str2)
-    str1 = str1.replace(" ", "")
-    str2 = str2.replace(" ", "")
-    gram = []
-    if len(str1) < 3:
-        gram.append(str1)
-    else:
-        tmp1 = str1[0]
-        tmp2 = str1[1]
-        for x in str1[2:]:
-            gram.append(tmp1 + tmp2 + x)
-            tmp1 = tmp2
-            tmp2 = x
-    point = 0
-    count = 0
-    for x in gram:
-        i = findGram(x, str2)
-        if i > 0:
-            i = 90 + 10 * i
-            count=count + 1
-        else:
-            i = 0
-        point = point + i
-    point = point + 20*count*count
-    point = point / len(str1) * 10
-    point = point / len(str2) * 10
-    return point
-def findGram(gram, s):
-    """
-    s中にあらわれるgramの数を調べる
-    """
-    st = s
-    i = 0
-    while st.find(gram) != -1:
-        i = i + 1
-        st = st[st.find(gram) + 1:]
-    return i
-def getNounQuadGram(s):
-    """
-    。
-    """
-    str1 = zenhan.toHankaku(s)
-    gram={}
-    if len(str1) > 4:
-        tmp1 = str1[0]
-        tmp2 = str1[1]
-        tmp3 = str1[2]
-        skipnum=0
-        for x in str1[3:]:
-            if skipnum>0:
-                skipnum=skipnum-1
-            else:
-                tmps=tmp1 + tmp2 + tmp3 + x
-                if tmps.find(" ")<0:
-                    if zenhan.checkCharacterType(tmp1)>0:
-                        if (zenhan.checkCharacterType(tmp1)==zenhan.checkCharacterType(tmp2)):
-                            if (zenhan.checkCharacterType(tmp2)==zenhan.checkCharacterType(tmp3)):
-                                if (zenhan.checkCharacterType(tmp3)==zenhan.checkCharacterType(x)):
-                                    gram[tmps]=gram.get(tmps,0)+1
-                                    skipnum=0
-                                else:
-                                    skipnum=3
-                            else:
-                                if (zenhan.checkCharacterType(tmp3)==zenhan.checkCharacterType(x)):
-                                    skipnum=2
-                                else:
-                                    skipnum=3
-                        else:
-                            if (zenhan.checkCharacterType(tmp2)==zenhan.checkCharacterType(tmp3)):
-                                if (zenhan.checkCharacterType(tmp3)==zenhan.checkCharacterType(x)):
-                                    skipnum=0
-                                else:
-                                    skipnum=3
-                            else:
-                                if (zenhan.checkCharacterType(tmp3)==zenhan.checkCharacterType(x)):
-                                    skipnum=2
-                                else:
-                                    skipnum=3
-            tmp1 = tmp2
-            tmp2 = tmp3
-            tmp3 = x
-    return gram
-""
\ No newline at end of file