OSDN Git Service

phpを介してインターネットからページを読み込む機能を追加。
[chnosproject/AI004.git] / aiwrcgnz.js
1 function AI_WordRecognition(env){
2         this.env = env;
3 }
4 AI_WordRecognition.prototype = {
5         slideLookUpCandidateWordByHistory: function(input){
6                 var h = this.env.input.historyList;
7                 var cList = new Array();
8                 for(var i = 0, iLen = input.length; i < iLen; i++){
9                         //input character loop
10                         var iStr = input.substr(i);
11                         var cLen = 0;
12                         var cStr = "";
13                         for(var j = 0, jLen = h.length; j < jLen; j++){
14                                 //history entry loop
15                                 var hStrBase = h[j];
16                                 for(var k = 0, kLen = hStrBase.length; k < kLen; k++){
17                                         //history character loop
18                                         var hStr = hStrBase.substr(k);
19                                         var m = hStr.compareLeftHand(iStr);
20                                         if(m > cLen && m != iStr.length){
21                                                 cLen = m;
22                                         }
23                                 }
24                         }
25                         if(cLen > 0){
26                                 cList.pushUnique(new AI_CandidateWordTag(iStr.substr(0, cLen).trim())).wordCount++;
27                         }
28                 }
29                 //フィルター
30                 this.filterCandidateWordList00(cList);
31                 this.filterCandidateWordList01(cList, 2);
32                 //追加
33                 this.mergeCandidateWordList(cList);
34                 
35         },
36         appendCandidateWordList: function(strTag){
37                 var s = this.env.memory.candidateWordList.isIncluded(strTag, function(a, b){ return (a.str == b.str); });
38                 if(s){
39                         s.wordCount++;
40                 } else{
41                         strTag.wordCount = 1;
42                         this.env.memory.appendMemoryTag(strTag);
43                 }
44         },
45         mergeCandidateWordList: function(strTagList){
46                 for(var i = 0, iLen = strTagList.length; i < iLen; i++){
47                         this.appendCandidateWordList(strTagList[i]);
48                 }
49         },
50         debugShowCandidateWordList: function(){
51                 var c = (new Array()).concat(this.env.memory.candidateWordList);
52                 c.reverse();
53                 this.env.debug("candidateWordList:" + c.length + "\n");
54                 
55                 for(var i = 0, iLen = c.length; i < iLen; i++){
56                         this.env.debug((i + 1) + ":\t" + c[i].wordCount.toString() + ":\t" + c[i].wordLevel.toString() + ":\t" + c[i].str + "\n");
57                 }
58                 this.env.debug("candidateWordList end\n");
59         },
60         filterCandidateWordList00:function(cList){
61                 //00:長い単語に含まれており、かつ出現頻度が長い単語と等しい単語を削除
62                 //cList内の候補単語に対して、フィルターをかける。
63                 var iLen = cList.length;
64                 if(iLen < 1){
65                         return;
66                 }
67                 var baseStrTag = cList[0];
68                 for(var i = 1; i < iLen; i++){
69                         var c = cList[i];
70                         if(baseStrTag.str.indexOf(c.str) != -1){
71                                 //c.strはbaseStrTag.strに含まれている
72                                 if(baseStrTag.wordCount == c.wordCount){
73                                         //かつ出現回数が等しいので不要な単語
74                                         //後で削除する。出現回数を0にマークする。
75                                         c.wordCount = 0;
76                                 }
77                         }
78                         if(c.wordCount > 0){
79                                 //単語は削除されなかった、つまり異なる単語なので、baseStrTagを更新
80                                 var baseStrTag = c;
81                         }
82                 }
83                 //削除処理
84                 for(var i = 1; i < iLen; i++){
85                         var c = cList[i];
86                         if(c.wordCount == 0){
87                                 cList.removeByIndex(i);
88                                 i--;
89                                 iLen--;
90                         }
91                 }
92         },
93         filterCandidateWordList01:function(cList, minLen){
94                 //01:minLenに満たない文字数の候補を削除
95                 //削除処理
96                 var iLen = cList.length;
97                 for(var i = 0; i < iLen; i++){
98                         if(cList[i].str.length < minLen){
99                                 cList.removeByIndex(i);
100                                 i--;
101                                 iLen--;
102                         }
103                 }
104         },
105         filterCandidateWordList02:function(cList, minCount){
106                 //02:minCountに満たない出現回数の候補を削除
107                 //削除処理
108                 var iLen = cList.length;
109                 for(var i = 0; i < iLen; i++){
110                         if(cList[i].wordCount < minCount){
111                                 cList.removeByIndex(i);
112                                 i--;
113                                 iLen--;
114                         }
115                 }
116         },
117         sortCandidateWordListByWordCount: function(){
118                 this.env.memory.candidateWordList.stableSort(function(a, b){
119                         return a.wordCount - b.wordCount;
120                 });
121         },
122         sortCandidateWordListByWordLevel: function(){
123                 this.env.memory.candidateWordList.stableSort(function(a, b){
124                         return a.wordLevel - b.wordLevel;
125                 });
126         },
127         computeWordLevel: function(strTag){
128                 var s = strTag.str;
129                 var iLen = s.length;
130                 var f = 0;
131                 strTag.wordLevel = 0;
132                 //文字列中の文字種数を数える
133                 for(var i = 0; i < iLen; i++){
134                         if(s.isHiraganaAt(i)){
135                                 f |= 0x01;
136                         } else if(s.isKanjiAt(i)){
137                                 f |= 0x02;
138                         } else if(s.isKatakanaAt(i)){
139                                 f |= 0x04;
140                         } else if(s.isHankakuKanaAt(i)){
141                                 f |= 0x08;
142                         } else{
143                                 f |= 0x10;
144                         }
145                 }
146                 for(var i = 0; i < 5; i++){
147                         if((f & 0x01) != 0){
148                                 strTag.wordLevel++;
149                         }
150                         f >>>= 1;
151                 }
152                 strTag.wordLevel = 1 / strTag.wordLevel;
153                 return;
154         },
155         computeEachWordLevel: function(){
156                 var iLen = this.env.memory.candidateWordList.length;
157                 for(var i = 0; i < iLen; i++){
158                         this.computeWordLevel(this.env.memory.candidateWordList[i]);
159                 }
160         }
161 }