OSDN Git Service

ELCCIDE(elcc/elccide.html)を追加。ブラウザ上でソースコードを編集し、即座に実行することができます。
[chnosproject/AI004.git] / aiwrcgnz.js
1 function AI_WordRecognition(env){
2         this.env = env;
3         this.wordListCache = null;
4         this.wordListCacheLastModifiedDate = new Date();
5 }
6 AI_WordRecognition.prototype = {
7         slideLookUpCandidateWordByHistory: function(input){
8                 var h = this.env.input.historyList;
9                 var cList = new Array();
10                 for(var i = 0, iLen = input.length; i < iLen; i++){
11                         //input character loop
12                         var iStr = input.substr(i);
13                         var cLen = 0;
14                         var cStr = "";
15                         for(var j = 0, jLen = h.length; j < jLen; j++){
16                                 //history entry loop
17                                 var hStrBase = h[j];
18                                 for(var k = 0, kLen = hStrBase.length; k < kLen; k++){
19                                         //history character loop
20                                         var hStr = hStrBase.substr(k);
21                                         var m = hStr.compareLeftHand(iStr);
22                                         if(m > cLen && m != iStr.length){
23                                                 cLen = m;
24                                         }
25                                 }
26                         }
27                         if(cLen > 0){
28                                 cList.pushUnique(new AI_CandidateWordTag(iStr.substr(0, cLen).trim())).wordCount++;
29                         }
30                 }
31                 //フィルター
32                 this.filterCandidateWordList00(cList);
33                 this.filterCandidateWordList01(cList, 2);
34                 this.filterCandidateWordList03(cList);
35                 //追加
36                 this.mergeCandidateWordList(cList);
37                 
38         },
39         appendCandidateWordList: function(strTag){
40                 var s = this.env.memory.candidateWordList.isIncluded(strTag, function(a, b){ return (a.str == b.str); });
41                 if(s){
42                         s.wordCount++;
43                 } else{
44                         strTag.wordCount = 1;
45                         this.env.memory.appendMemoryTag(strTag);
46                 }
47         },
48         getCandidateWordTagByString: function(str){
49                 return this.env.memory.candidateWordList.isIncluded(str, function(a, b){ return (a.str == b); });
50         },
51         mergeCandidateWordList: function(strTagList){
52                 for(var i = 0, iLen = strTagList.length; i < iLen; i++){
53                         this.appendCandidateWordList(strTagList[i]);
54                 }
55         },
56         cleanCandidateWordList: function(){
57                 //不要な候補単語を削除
58                 //出現回数の少ない候補単語
59                 //単語度が1未満の単語(暫定)
60                 var iLen = this.env.memory.candidateWordList.length;
61                 for(var i = 0; i < iLen; i++){
62                         if(this.env.memory.candidateWordList[i].wordCount < 10){
63                                 this.env.debug("Too small wordCount of candidateWord [" + this.env.memory.candidateWordList[i].str + "]. Removed.\n");
64                                 this.env.memory.removeMemoryTagByObject(this.env.memory.candidateWordList[i]);
65                                 i--;
66                                 iLen--;
67                         }
68                         if(this.env.memory.candidateWordList[i].wordLevel < 1){
69                                 this.env.debug("Too small wordLevel of candidateWord [" + this.env.memory.candidateWordList[i].str + "]. Removed.\n");
70                                 this.env.memory.removeMemoryTagByObject(this.env.memory.candidateWordList[i]);
71                                 i--;
72                                 iLen--;
73                         }
74                 }
75                 this.env.memory.candidateWordListLastCleanedDate = new Date();
76         },
77         debugShowCandidateWordList: function(){
78                 var c = this.env.memory.candidateWordList.copy();
79                 c.reverse();
80                 this.env.debug("candidateWordList:" + c.length + "\n #:wCount:level:str\n");
81                 
82                 for(var i = 0, iLen = c.length; i < iLen; i++){
83                         this.env.debug((i + 1) + ":\t" + c[i].wordCount.toString() + ":\t" + c[i].wordLevel.toString() + ":\t" + c[i].str + "\n");
84                 }
85                 this.env.debug("candidateWordList end\n");
86         },
87         filterCandidateWordList00:function(cList){
88                 //00:長い単語に含まれており、かつ出現頻度が長い単語と等しい単語を削除
89                 //cList内の候補単語に対して、フィルターをかける。
90                 var iLen = cList.length;
91                 if(iLen < 1){
92                         return;
93                 }
94                 var baseStrTag = cList[0];
95                 for(var i = 1; i < iLen; i++){
96                         var c = cList[i];
97                         if(baseStrTag.str.indexOf(c.str) != -1){
98                                 //c.strはbaseStrTag.strに含まれている
99                                 if(baseStrTag.wordCount == c.wordCount){
100                                         //かつ出現回数が等しいので不要な単語
101                                         //後で削除する。出現回数を0にマークする。
102                                         c.wordCount = 0;
103                                 }
104                         }
105                         if(c.wordCount > 0){
106                                 //単語は削除されなかった、つまり異なる単語なので、baseStrTagを更新
107                                 var baseStrTag = c;
108                         }
109                 }
110                 //削除処理
111                 for(var i = 1; i < iLen; i++){
112                         var c = cList[i];
113                         if(c.wordCount == 0){
114                                 cList.removeByIndex(i);
115                                 i--;
116                                 iLen--;
117                         }
118                 }
119         },
120         filterCandidateWordList01:function(cList, minLen){
121                 //01:minLenに満たない文字数の候補を削除
122                 var iLen = cList.length;
123                 for(var i = 0; i < iLen; i++){
124                         if(cList[i].str.length < minLen){
125                                 cList.removeByIndex(i);
126                                 i--;
127                                 iLen--;
128                         }
129                 }
130         },
131         filterCandidateWordList02:function(cList, minCount){
132                 //02:minCountに満たない出現回数の候補を削除
133                 var iLen = cList.length;
134                 for(var i = 0; i < iLen; i++){
135                         if(cList[i].wordCount < minCount){
136                                 cList.removeByIndex(i);
137                                 i--;
138                                 iLen--;
139                         }
140                 }
141         },
142         filterCandidateWordList03: function(cList){
143                 //03:すでに単語と判明している候補を削除
144                 var iLen = cList.length;
145                 for(var i = 0; i < iLen; i++){
146                         if(this.env.memory.getUUIDFromWord(cList[i].str) != this.env.UUID_Meaning_UndefinedString){
147                                 cList.removeByIndex(i);
148                                 i--;
149                                 iLen--;
150                         }
151                 }
152         },
153         sortCandidateWordListByWordCount: function(){
154                 this.env.memory.candidateWordList.stableSort(function(a, b){
155                         return a.wordCount - b.wordCount;
156                 });
157         },
158         sortCandidateWordListByWordLevel: function(){
159                 this.env.memory.candidateWordList.stableSort(function(a, b){
160                         return a.wordLevel - b.wordLevel;
161                 });
162         },
163         sortWordListByLength: function(){
164                 //文字数の大きい方がリストの最初に来るようにする。
165                 this.env.memory.wordList.stableSort(function(a, b){
166                         return b.str.length - a.str.length;
167                 });
168         },
169         computeWordLevel: function(strTag){
170                 var s = strTag.str;
171                 var iLen = s.length;
172                 var f = 0;
173                 strTag.wordLevel = 0;
174                 //文字列中の文字種数を数える
175                 for(var i = 0; i < iLen; i++){
176                         if(s.isHiraganaAt(i)){
177                                 f |= 0x01;
178                         } else if(s.isKanjiAt(i)){
179                                 f |= 0x02;
180                         } else if(s.isKatakanaAt(i)){
181                                 f |= 0x04;
182                         } else if(s.isHankakuKanaAt(i)){
183                                 f |= 0x08;
184                         } else{
185                                 f |= 0x10;
186                         }
187                 }
188                 for(var i = 0; i < 5; i++){
189                         if((f & 0x01) != 0){
190                                 strTag.wordLevel++;
191                         }
192                         f >>>= 1;
193                 }
194                 strTag.wordLevel = 1 / strTag.wordLevel;
195                 return;
196         },
197         computeEachWordLevel: function(){
198                 var iLen = this.env.memory.candidateWordList.length;
199                 for(var i = 0; i < iLen; i++){
200                         this.computeWordLevel(this.env.memory.candidateWordList[i]);
201                 }
202         },
203         splitByWord: function(s){
204                 if(!this.wordListCache || this.wordListCacheLastModifiedDate < this.env.memory.wordListLastModifiedDate){
205                         //キャッシュが存在しないか古い場合、元のリストをソートしてからキャッシュを作成
206                         this.sortWordListByLength();
207                         this.wordListCache = this.env.memory.wordList.propertiesNamed("str");
208                         this.wordListCacheLastModifiedDate = new Date();
209                 }
210                 return s.splitByArraySeparatorSeparatedLong(this.wordListCache);
211         },
212         getUUIDListFromSeparatedString: function(separated){
213                 var retv = new Array();
214                 for(var i = 0, iLen = separated.length; i < iLen; i++){
215                         retv.push(this.env.memory.getUUIDFromWord(separated[i]));
216                 }
217                 return retv;
218         },
219 }