#!/usr/bin/python
# coding: UTF-8
# Rec10 TS Recording Tools
-# Copyright (C) 2009 Yukikaze
+# Copyright (C) 2009-2011 Yukikaze
import zenhan
def bigram(str1, str2):
tmp = x
point = 0
for x in gram:
- i = find_gram(x, str2)
+ i = findGram(x, str2)
if i > 0:
i = 90 + 10 * i
else:
point = point + i
return point
def trigram(str1, str2):
+ """
+ trigramによる単語の近さを計算します。
+ 単語文字列の長さも考慮にいれます。
+ """
str1 = zenhan.toHankaku(str1)
str2 = zenhan.toHankaku(str2)
str1 = str1.replace(" ", "")
str2 = str2.replace(" ", "")
- """
- trigramによる単語の近さを計算します。
- """
gram = []
if len(str1) < 3:
gram.append(str1)
point = 0
count = 0
for x in gram:
- i = find_gram(x, str2)
+ i = findGram(x, str2)
if i > 0:
i = 90 + 10 * i
count=count + 1
point = point / len(str1) * 10
point = point / len(str2) * 10
return point
-def find_gram(gram, s):
+def findGram(gram, s):
"""
s中にあらわれるgramの数を調べる
"""
i = i + 1
st = st[st.find(gram) + 1:]
return i
-def get_noun_quad_gram(s):
+def getNounQuadGram(s):
"""
。
"""
else:
tmps=tmp1 + tmp2 + tmp3 + x
if tmps.find(" ")<0:
- if zenhan.check_Character_Type(tmp1)>0:
- if (zenhan.check_Character_Type(tmp1)==zenhan.check_Character_Type(tmp2)):
- if (zenhan.check_Character_Type(tmp2)==zenhan.check_Character_Type(tmp3)):
- if (zenhan.check_Character_Type(tmp3)==zenhan.check_Character_Type(x)):
+ if zenhan.checkCharacterType(tmp1)>0:
+ if (zenhan.checkCharacterType(tmp1)==zenhan.checkCharacterType(tmp2)):
+ if (zenhan.checkCharacterType(tmp2)==zenhan.checkCharacterType(tmp3)):
+ if (zenhan.checkCharacterType(tmp3)==zenhan.checkCharacterType(x)):
gram[tmps]=gram.get(tmps,0)+1
skipnum=0
else:
skipnum=3
else:
- if (zenhan.check_Character_Type(tmp3)==zenhan.check_Character_Type(x)):
+ if (zenhan.checkCharacterType(tmp3)==zenhan.checkCharacterType(x)):
skipnum=2
else:
skipnum=3
else:
- if (zenhan.check_Character_Type(tmp2)==zenhan.check_Character_Type(tmp3)):
- if (zenhan.check_Character_Type(tmp3)==zenhan.check_Character_Type(x)):
+ if (zenhan.checkCharacterType(tmp2)==zenhan.checkCharacterType(tmp3)):
+ if (zenhan.checkCharacterType(tmp3)==zenhan.checkCharacterType(x)):
skipnum=0
else:
skipnum=3
else:
- if (zenhan.check_Character_Type(tmp3)==zenhan.check_Character_Type(x)):
+ if (zenhan.checkCharacterType(tmp3)==zenhan.checkCharacterType(x)):
skipnum=2
else:
skipnum=3