OSDN Git Service

stop using trunk directory in rectool
[rec10/rec10-git.git] / rec10 / branches / 0.5 / src / n_gram.py
1 #!/usr/bin/python
2 # coding: UTF-8
3 # Rec10 TS Recording Tools
4 # Copyright (C) 2009 Yukikaze
5 import zenhan
6 def bigram(str1,str2):
7     str1=zenhan.toHankaku(str1)
8     str2=zenhan.toHankaku(str2)
9     str1=str1.replace(" ","")
10     str2=str2.replace(" ","")
11     """
12     bigramによる単語の近さを計算します。
13     """
14     gram=[]
15     if len(str1)<2:
16         gram.append(str1)
17     else:
18         tmp=str1[0]
19         for x in str1[1:]:
20             gram.append(tmp+x)
21             tmp=x
22     point=0
23     for x in gram:
24         i=find_gram(x, str2)
25         if i>0:
26             i=90+10*i
27         else:
28             i=0
29         point=point+i
30     return point
31 def find_gram(gram,s):
32     """
33     s中にあらわれるgramの数を調べる
34     """
35     st=s
36     i=0
37     while st.find(gram)!=-1:
38        i=i+1
39        st=st[st.find(gram)+1:]
40     return i