OSDN Git Service

初期コミット。単語抽出の実験中。
[chnosproject/AI004.git] / ai.js
1 //AI004
2
3 //
4 // クラス拡張
5 //
6
7 //配列関連
8 Array.prototype.removeAllObject = function(anObject){
9         //Array中にある全てのanObjectを削除し、空いた部分は前につめる。
10         //戻り値は削除が一回でも実行されたかどうか
11         var ret = false;
12         for(var i = 0; i < this.length; i++){
13                 if(this[i] == anObject){
14                         this.splice(i, 1);
15                         ret = true;
16                         i--;
17                 }
18         }
19         return ret;
20 }
21 Array.prototype.removeByIndex = function(index){
22         //Array[index]を削除し、空いた部分は前につめる。
23         this.splice(index, 1);
24         return;
25 }
26 Array.prototype.intersectionWith = function(a, b, fEqualTo){
27         //積集合を求める
28         //fEqualToは省略可能で、評価関数fEqualTo(a[i], b[j])を設定する。
29         var r = new Array();
30         for(var i = 0, len = b.length; i < len; i++){
31                 if(this.isIncluded(b[i], fEqualTo)){
32                         r.push(b[i]);
33                 }
34         }
35         return r;
36 }
37 Array.prototype.unionWith = function(a, b, fEqualTo){
38         //和集合を求める
39         //fEqualToは省略可能で、評価関数fEqualTo(a[i], b[j])を設定する。
40         var r = new Array();
41         for(var i = 0, len = b.length; i < len; i++){
42                 if(!this.isIncluded(b[i], fEqualTo)){
43                         r.push(b[i]);
44                 }
45         }
46         return this.concat(r);
47 }
48 Array.prototype.isIncluded = function(obj, fEqualTo){
49         //含まれている場合は配列内のそのオブジェクトを返す
50         //fEqualToは省略可能で、評価関数fEqualTo(array[i], obj)を設定する。
51         if(fEqualTo == undefined){
52                 for(var i = 0, len = this.length; i < len; i++){
53                         if(this[i] == obj){
54                                 return this[i];
55                         }
56                 }
57         } else{
58                 for(var i = 0, len = this.length; i < len; i++){
59                         if(fEqualTo(this[i], obj)){
60                                 return this[i];
61                         }
62                 }
63         }
64         return false;
65 }
66 Array.prototype.pushUnique = function(obj, fEqualTo){
67         //値が既に存在する場合は追加しない。評価関数fEqualTo(array[i], obj)を設定することができる。
68         //結果的に配列内にあるオブジェクトが返される。
69         var o = this.isIncluded(obj, fEqualTo);
70         if(!o){
71                 this.push(obj);
72                 return obj;
73         }
74         return o;
75 }
76 Array.prototype.stableSort = function(f){
77         // http://blog.livedoor.jp/netomemo/archives/24688861.html
78         // Chrome等ではソートが必ずしも安定ではないので、この関数を利用する。
79         if(f == undefined){
80                 f = function(a,b){ return a - b; };
81         }
82         for(var i = 0; i < this.length; i++){
83                 this[i].__id__ = i;
84         }
85         this.sort.call(this, function(a,b){
86                 var ret = f(a, b);
87                 if(ret == 0){
88                         return (a.__id__ > b.__id__) ? 1 : -1;
89                 } else{
90                         return ret;
91                 }
92         });
93         for(var i = 0;i < this.length;i++){
94                 delete this[i].__id__;
95         }
96 };
97
98 //文字列関連
99 String.prototype.replaceAll = function(org, dest){
100         //String中にある文字列orgを文字列destにすべて置換する。
101         //http://www.syboos.jp/webjs/doc/string-replace-and-replaceall.html
102         return this.split(org).join(dest);
103 }
104 String.prototype.compareLeftHand = function (search){
105         //前方一致長を求める。
106         for(var i = 0; search.charAt(i) != ""; i++){
107                 if(search.charAt(i) != this.charAt(i)){
108                         break;
109                 }
110         }
111         return i;
112 }
113 String.prototype.splitByArray = function(separatorList){
114         //リスト中の文字列それぞれで分割された配列を返す。
115         var retArray = new Array();
116         retArray[0] = this;
117         
118         for(var i = 0; i < separatorList.length; i++){
119                 var tmpArray = new Array();
120                 for(var k = 0; k < retArray.length; k++){
121                         tmpArray[k] = retArray[k].split(separatorList[i]);
122                         if(tmpArray[k][tmpArray[k].length - 1] == ""){
123                                 tmpArray[k].splice(tmpArray[k].length - 1, 1);
124                                 if(tmpArray[k] && tmpArray.length > 0){
125                                         for(var m = 0; m < tmpArray[k].length; m++){
126                                                 tmpArray[k][m] += separatorList[i];
127                                         }
128                                 }
129                         } else{
130                                 for(var m = 0; m < tmpArray[k].length - 1; m++){
131                                         tmpArray[k][m] += separatorList[i];
132                                 }
133                         }
134                 }
135                 retArray = new Array();
136                 retArray = retArray.concat.apply(retArray, tmpArray);
137         }
138         
139         return retArray;
140 }
141 String.prototype.trim = function(str){
142         return this.replace(/^[       ]+|[          ]+$/g, "").replace(/\n$/g, "");
143 }
144 //http://d.hatena.ne.jp/favril/20090514/1242280476
145 String.prototype.isKanjiAt = function(index){
146         var u = this.charCodeAt(index);
147         if( (0x4e00  <= u && u <= 0x9fcf) ||    // CJK統合漢字
148                 (0x3400  <= u && u <= 0x4dbf) ||        // CJK統合漢字拡張A
149                 (0x20000 <= u && u <= 0x2a6df) ||       // CJK統合漢字拡張B
150                 (0xf900  <= u && u <= 0xfadf) ||        // CJK互換漢字
151                 (0x2f800 <= u && u <= 0x2fa1f)){        // CJK互換漢字補助
152                 return true;
153         }
154     return false;
155 }
156 String.prototype.isHiraganaAt = function(index){
157         var u = this.charCodeAt(index);
158         if(0x3040 <= u && u <= 0x309f){
159                 return true;
160         }
161         return false;
162 }
163 String.prototype.isKatakanaAt = function(index){
164         var u = this.charCodeAt(index);
165         if(0x30a0 <= u && u <= 0x30ff){
166                 return true;
167         }
168         return false;
169 }
170 String.prototype.isHankakuKanaAt = function(index){
171         var u = this.charCodeAt(index);
172         if(0xff61 <= u && u <= 0xff9f){
173                 return true;
174         }
175         return false;
176 }
177
178 //
179 // メインクラス
180 //
181
182 function AI(){
183         //サブクラス
184         this.input = new AI_Input(this);
185         this.wordRecognition = new AI_WordRecognition(this);
186         //出力関連
187         this.outputTimer = null;
188         this.messageBox = null;
189         this.messageBoxBuffer = "";
190         this.maxMessageStringLength = 0xffffff;
191         this.debugBox = null;
192         this.debugBoxBuffer = "";
193         this.maxDebugStringLength = 0xffff;
194         
195 }
196 AI.prototype = {
197         sendToAI: function(str){
198                 this.debug("**** Start thinking ****\n");
199                 this.debug("input:[" + str + "]\n");
200                 this.input.appendInput(str);
201                 for(;;){
202                         var s = this.input.getSentence();
203                         if(s === undefined){
204                                 break;
205                         }
206                         this.message(s + "\n");
207                 }
208                 this.wordRecognition.sortCandidateWordListByWordCount();
209                 this.wordRecognition.computeEachWordLevel();
210                 this.wordRecognition.sortCandidateWordListByWordLevel();
211                 this.wordRecognition.debugShowCandidateWordList();
212                 this.debug("**** End thinking ****\n");
213         },
214         setMessageBoxDOMObject: function(mBoxObj){
215                 this.messageBox = mBoxObj;
216                 this.setOutputTimer();
217         },
218         setDebugBoxDOMObject: function(dBoxObj){
219                 this.debugBox = dBoxObj;
220                 this.setOutputTimer();
221         },
222         message: function(str){
223                 if(this.messageBox){
224                         this.messageBoxBuffer += "AI> " + str;
225                 }
226         },
227         debug: function(str){
228                 if(this.debugBox){
229                         this.debugBoxBuffer += str;
230                 }
231         },
232         outputShowTick: function(){
233                 if(this.messageBox && this.messageBoxBuffer != ""){
234                         //messageBox
235                         var str = this.messageBox.innerHTML + this.messageBoxBuffer;
236                         this.messageBoxBuffer = "";
237                         if(str.length > this.maxMessageStringLength){
238                                 str = str.slice(str.length - (this.maxMessageStringLength >> 1));
239                         }
240                         this.messageBox.innerHTML = str;
241                         this.messageBox.scrollTop = this.messageBox.scrollHeight;
242                 }
243                 if(this.debugBox && this.debugBoxBuffer != ""){
244                         //debugBox
245                         var str = this.debugBox.innerHTML + this.debugBoxBuffer;
246                         this.debugBoxBuffer = "";
247                         if(str.length > this.maxDebugStringLength){
248                                 str = str.slice(str.length - (this.maxDebugStringLength >> 1));
249                         }
250                         this.debugBox.innerHTML = str;
251                         this.debugBox.scrollTop = this.debugBox.scrollHeight;
252                 }
253         },
254         setOutputTimer: function(){
255                 if(!this.messageBox && !this.debugBox){
256                         //すべて無効だったらタイマーの動作自体を止める
257                         window.clearTimeout(this.outputTimer);
258                         this.outputTimer = null;
259                 } else if(!this.outputTimer){
260                         //どれかが有効でかつタイマーが止まっていたらスタートさせる
261                         var that = this;
262                         this.outputTimer = window.setInterval(function(){that.outputShowTick();}, 50);
263                 }
264         },
265 }
266
267 //
268 //サブクラス
269 //
270
271 function AI_WordRecognition(env){
272         this.env = env;
273         this.candidateWordList = new Array();
274 }
275 AI_WordRecognition.prototype = {
276         slideLookUpCandidateWordByHistory: function(input){
277                 var h = this.env.input.historyList;
278                 var cList = new Array();
279                 for(var i = 0, iLen = input.length; i < iLen; i++){
280                         //input character loop
281                         var iStr = input.substr(i);
282                         var cLen = 0;
283                         var cStr = "";
284                         for(var j = 0, jLen = h.length; j < jLen; j++){
285                                 //history entry loop
286                                 var hStrBase = h[j];
287                                 for(var k = 0, kLen = hStrBase.length; k < kLen; k++){
288                                         //history character loop
289                                         var hStr = hStrBase.substr(k);
290                                         var m = hStr.compareLeftHand(iStr);
291                                         if(m > cLen && m != iStr.length){
292                                                 cLen = m;
293                                         }
294                                 }
295                         }
296                         if(cLen > 0){
297                                 cList.pushUnique(new AI_WordTag(iStr.substr(0, cLen))).wordCount++;
298                         }
299                 }
300                 //フィルター
301                 this.filterCandidateWordList00(cList);
302                 this.filterCandidateWordList01(cList, 2);
303                 //追加
304                 this.mergeCandidateWordList(cList);
305         },
306         appendCandidateWordList: function(strTag){
307                 var s = this.candidateWordList.isIncluded(strTag, function(a, b){ return (a.str == b.str); });
308                 if(s){
309                         s.wordCount++;
310                 } else{
311                         strTag.wordCount = 1;
312                         this.candidateWordList.push(strTag);
313                 }
314         },
315         mergeCandidateWordList: function(strTagList){
316                 for(var i = 0, iLen = strTagList.length; i < iLen; i++){
317                         this.appendCandidateWordList(strTagList[i]);
318                 }
319         },
320         debugShowCandidateWordList: function(){
321                 this.env.debug("candidateWordList:\n");
322                 var c = this.candidateWordList;
323                 for(var i = 0, iLen = c.length; i < iLen; i++){
324                         this.env.debug(c[i].wordCount.toString() + " :" + c[i].wordLevel.toString() + " :" + c[i].str + "\n");
325                 }
326                 this.env.debug("candidateWordList end\n");
327         },
328         filterCandidateWordList00:function(cList){
329                 //00:長い単語に含まれており、かつ出現頻度が長い単語と等しい単語を削除
330                 //cList内の候補単語に対して、フィルターをかける。
331                 var iLen = cList.length;
332                 if(iLen < 1){
333                         return;
334                 }
335                 var baseStrTag = cList[0];
336                 for(var i = 1; i < iLen; i++){
337                         var c = cList[i];
338                         if(baseStrTag.str.indexOf(c.str) != -1){
339                                 //c.strはbaseStrTag.strに含まれている
340                                 if(baseStrTag.wordCount == c.wordCount){
341                                         //かつ出現回数が等しいので不要な単語
342                                         //後で削除する。出現回数を0にマークする。
343                                         c.wordCount = 0;
344                                 }
345                         }
346                         if(c.wordCount > 0){
347                                 //単語は削除されなかった、つまり異なる単語なので、baseStrTagを更新
348                                 var baseStrTag = c;
349                         }
350                 }
351                 //削除処理
352                 for(var i = 1; i < iLen; i++){
353                         var c = cList[i];
354                         if(c.wordCount == 0){
355                                 cList.removeByIndex(i);
356                                 i--;
357                                 iLen--;
358                         }
359                 }
360         },
361         filterCandidateWordList01:function(cList, minLen){
362                 //01:minLenに満たない文字数の候補を削除
363                 //削除処理
364                 var iLen = cList.length;
365                 for(var i = 0; i < iLen; i++){
366                         if(cList[i].str.length < minLen){
367                                 cList.removeByIndex(i);
368                                 i--;
369                                 iLen--;
370                         }
371                 }
372         },
373         filterCandidateWordList02:function(cList, minCount){
374                 //02:minCountに満たない出現回数の候補を削除
375                 //削除処理
376                 var iLen = cList.length;
377                 for(var i = 0; i < iLen; i++){
378                         if(cList[i].wordCount < minCount){
379                                 cList.removeByIndex(i);
380                                 i--;
381                                 iLen--;
382                         }
383                 }
384         },
385         sortCandidateWordListByWordCount: function(){
386                 this.candidateWordList.stableSort(function(a, b){
387                         return a.wordCount - b.wordCount;
388                 });
389         },
390         sortCandidateWordListByWordLevel: function(){
391                 this.candidateWordList.stableSort(function(a, b){
392                         return a.wordLevel - b.wordLevel;
393                 });
394         },
395         computeWordLevel: function(strTag){
396                 var s = strTag.str;
397                 var iLen = s.length;
398                 var f = 0;
399                 strTag.wordLevel = 0;
400                 //文字列中の文字種数を数える
401                 for(var i = 0; i < iLen; i++){
402                         if(s.isHiraganaAt(i)){
403                                 f |= 0x01;
404                         } else if(s.isKanjiAt(i)){
405                                 f |= 0x02;
406                         } else if(s.isKatakanaAt(i)){
407                                 f |= 0x04;
408                         } else if(s.isHankakuKanaAt(i)){
409                                 f |= 0x08;
410                         } else{
411                                 f |= 0x10;
412                         }
413                 }
414                 for(var i = 0; i < 5; i++){
415                         if((f & 0x01) != 0){
416                                 strTag.wordLevel++;
417                         }
418                         f >>>= 1;
419                 }
420                 strTag.wordLevel = 1 / strTag.wordLevel;
421                 return;
422         },
423         computeEachWordLevel: function(){
424                 var iLen = this.candidateWordList.length;
425                 for(var i = 0; i < iLen; i++){
426                         this.computeWordLevel(this.candidateWordList[i]);
427                 }
428         }
429 }
430
431 function AI_WordTag(str){
432         this.str = str;
433         this.wordCount = 0;
434         this.wordLevel = 0;
435 }
436
437 function AI_Input(env){
438         this.env = env;
439         this.historyList = new Array();
440         this.sentenceList = new Array();
441 }
442 AI_Input.prototype = {
443         maxHistoryLength: 32,
444         sentenceSeparator: [
445                 "。",
446                 "!",
447                 "?",
448                 "!",
449                 "?",
450                 "\n",
451         ],
452         appendInput: function(str){
453                 var sList = str.splitByArray(this.sentenceSeparator);
454                 
455                 this.sentenceList = this.sentenceList.concat(sList)
456         },
457         getSentence: function(){
458                 //改行のみの文は破棄
459                 for(;;){
460                         if(this.sentenceList.length <= 0){
461                                 return undefined;
462                         }
463                         var retv = this.sentenceList[0];
464                         this.sentenceList.splice(0, 1);
465                         retv = retv.trim();
466                         if(retv != ""){
467                                 break;
468                         }
469                 }
470                 //ここで単語候補抽出を行っておく
471                 this.env.wordRecognition.slideLookUpCandidateWordByHistory(retv);
472                 //
473                 this.appendHistory(retv);
474                 return retv;
475         },
476         appendHistory: function(str){
477                 this.historyList.push(str);
478                 if(this.historyList.length > this.maxHistoryLength){
479                         this.historyList.splice(0, this.maxHistoryLength >> 1);
480                 }
481         },
482 }