1 function AI_WordRecognition(env){
3 this.wordListCache = null;
4 this.wordListCacheLastModifiedDate = new Date();
6 AI_WordRecognition.prototype = {
7 slideLookUpCandidateWordByHistory: function(input){
8 var h = this.env.input.historyList;
9 var cList = new Array();
10 for(var i = 0, iLen = input.length; i < iLen; i++){
11 //input character loop
12 var iStr = input.substr(i);
15 for(var j = 0, jLen = h.length; j < jLen; j++){
18 for(var k = 0, kLen = hStrBase.length; k < kLen; k++){
19 //history character loop
20 var hStr = hStrBase.substr(k);
21 var m = hStr.compareLeftHand(iStr);
22 if(m > cLen && m != iStr.length){
28 cList.pushUnique(new AI_CandidateWordTag(iStr.substr(0, cLen).trim())).wordCount++;
32 this.filterCandidateWordList00(cList);
33 this.filterCandidateWordList01(cList, 2);
34 this.filterCandidateWordList03(cList);
36 this.mergeCandidateWordList(cList);
39 appendCandidateWordList: function(strTag){
40 var s = this.env.memory.candidateWordList.isIncluded(strTag, function(a, b){ return (a.str == b.str); });
45 this.env.memory.appendMemoryTag(strTag);
48 getCandidateWordTagByString: function(str){
49 return this.env.memory.candidateWordList.isIncluded(str, function(a, b){ return (a.str == b); });
51 mergeCandidateWordList: function(strTagList){
52 for(var i = 0, iLen = strTagList.length; i < iLen; i++){
53 this.appendCandidateWordList(strTagList[i]);
56 cleanCandidateWordList: function(){
60 var iLen = this.env.memory.candidateWordList.length;
61 for(var i = 0; i < iLen; i++){
62 if(this.env.memory.candidateWordList[i].wordCount < 10){
63 this.env.debug("Too small wordCount of candidateWord [" + this.env.memory.candidateWordList[i].str + "]. Removed.\n");
64 this.env.memory.removeMemoryTagByObject(this.env.memory.candidateWordList[i]);
68 if(this.env.memory.candidateWordList[i].wordLevel < 1){
69 this.env.debug("Too small wordLevel of candidateWord [" + this.env.memory.candidateWordList[i].str + "]. Removed.\n");
70 this.env.memory.removeMemoryTagByObject(this.env.memory.candidateWordList[i]);
75 this.env.memory.candidateWordListLastCleanedDate = new Date();
77 debugShowCandidateWordList: function(){
78 var c = this.env.memory.candidateWordList.copy();
80 this.env.debug("candidateWordList:" + c.length + "\n #:wCount:level:str\n");
82 for(var i = 0, iLen = c.length; i < iLen; i++){
83 this.env.debug((i + 1) + ":\t" + c[i].wordCount.toString() + ":\t" + c[i].wordLevel.toString() + ":\t" + c[i].str + "\n");
85 this.env.debug("candidateWordList end\n");
87 filterCandidateWordList00:function(cList){
88 //00:長い単語に含まれており、かつ出現頻度が長い単語と等しい単語を削除
89 //cList内の候補単語に対して、フィルターをかける。
90 var iLen = cList.length;
94 var baseStrTag = cList[0];
95 for(var i = 1; i < iLen; i++){
97 if(baseStrTag.str.indexOf(c.str) != -1){
98 //c.strはbaseStrTag.strに含まれている
99 if(baseStrTag.wordCount == c.wordCount){
101 //後で削除する。出現回数を0にマークする。
106 //単語は削除されなかった、つまり異なる単語なので、baseStrTagを更新
111 for(var i = 1; i < iLen; i++){
113 if(c.wordCount == 0){
114 cList.removeByIndex(i);
120 filterCandidateWordList01:function(cList, minLen){
121 //01:minLenに満たない文字数の候補を削除
122 var iLen = cList.length;
123 for(var i = 0; i < iLen; i++){
124 if(cList[i].str.length < minLen){
125 cList.removeByIndex(i);
131 filterCandidateWordList02:function(cList, minCount){
132 //02:minCountに満たない出現回数の候補を削除
133 var iLen = cList.length;
134 for(var i = 0; i < iLen; i++){
135 if(cList[i].wordCount < minCount){
136 cList.removeByIndex(i);
142 filterCandidateWordList03: function(cList){
143 //03:すでに単語と判明している候補を削除
144 var iLen = cList.length;
145 for(var i = 0; i < iLen; i++){
146 if(this.env.memory.getUUIDFromWord(cList[i].str) != this.env.UUID_Meaning_UndefinedString){
147 cList.removeByIndex(i);
153 sortCandidateWordListByWordCount: function(){
154 this.env.memory.candidateWordList.stableSort(function(a, b){
155 return a.wordCount - b.wordCount;
158 sortCandidateWordListByWordLevel: function(){
159 this.env.memory.candidateWordList.stableSort(function(a, b){
160 return a.wordLevel - b.wordLevel;
163 sortWordListByLength: function(){
164 //文字数の大きい方がリストの最初に来るようにする。
165 this.env.memory.wordList.stableSort(function(a, b){
166 return b.str.length - a.str.length;
169 computeWordLevel: function(strTag){
173 strTag.wordLevel = 0;
175 for(var i = 0; i < iLen; i++){
176 if(s.isHiraganaAt(i)){
178 } else if(s.isKanjiAt(i)){
180 } else if(s.isKatakanaAt(i)){
182 } else if(s.isHankakuKanaAt(i)){
188 for(var i = 0; i < 5; i++){
194 strTag.wordLevel = 1 / strTag.wordLevel;
197 computeEachWordLevel: function(){
198 var iLen = this.env.memory.candidateWordList.length;
199 for(var i = 0; i < iLen; i++){
200 this.computeWordLevel(this.env.memory.candidateWordList[i]);
203 splitByWord: function(s){
204 if(!this.wordListCache || this.wordListCacheLastModifiedDate < this.env.memory.wordListLastModifiedDate){
205 //キャッシュが存在しないか古い場合、元のリストをソートしてからキャッシュを作成
206 this.sortWordListByLength();
207 this.wordListCache = this.env.memory.wordList.propertiesNamed("str");
208 this.wordListCacheLastModifiedDate = new Date();
210 return s.splitByArraySeparatorSeparatedLong(this.wordListCache);
212 getUUIDListFromSeparatedString: function(separated){
213 var retv = new Array();
214 for(var i = 0, iLen = separated.length; i < iLen; i++){
215 retv.push(this.env.memory.getUUIDFromWord(separated[i]));