OSDN Git Service

コードファイルの分割
[chnosproject/AI004.git] / aiwrcgnz.js
1 function AI_WordRecognition(env){
2         this.env = env;
3 }
4 AI_WordRecognition.prototype = {
5         slideLookUpCandidateWordByHistory: function(input){
6                 var h = this.env.input.historyList;
7                 var cList = new Array();
8                 for(var i = 0, iLen = input.length; i < iLen; i++){
9                         //input character loop
10                         var iStr = input.substr(i);
11                         var cLen = 0;
12                         var cStr = "";
13                         for(var j = 0, jLen = h.length; j < jLen; j++){
14                                 //history entry loop
15                                 var hStrBase = h[j];
16                                 for(var k = 0, kLen = hStrBase.length; k < kLen; k++){
17                                         //history character loop
18                                         var hStr = hStrBase.substr(k);
19                                         var m = hStr.compareLeftHand(iStr);
20                                         if(m > cLen && m != iStr.length){
21                                                 cLen = m;
22                                         }
23                                 }
24                         }
25                         if(cLen > 0){
26                                 cList.pushUnique(new AI_CandidateWordTag(iStr.substr(0, cLen))).wordCount++;
27                         }
28                 }
29                 //フィルター
30                 this.filterCandidateWordList00(cList);
31                 this.filterCandidateWordList01(cList, 2);
32                 //追加
33                 this.mergeCandidateWordList(cList);
34         },
35         appendCandidateWordList: function(strTag){
36                 var s = this.env.memory.candidateWordList.isIncluded(strTag, function(a, b){ return (a.str == b.str); });
37                 if(s){
38                         s.wordCount++;
39                 } else{
40                         strTag.wordCount = 1;
41                         this.env.memory.appendMemoryTag(strTag);
42                 }
43         },
44         mergeCandidateWordList: function(strTagList){
45                 for(var i = 0, iLen = strTagList.length; i < iLen; i++){
46                         this.appendCandidateWordList(strTagList[i]);
47                 }
48         },
49         debugShowCandidateWordList: function(){
50                 this.env.debug("candidateWordList:\n");
51                 var c = this.env.memory.candidateWordList;
52                 for(var i = 0, iLen = c.length; i < iLen; i++){
53                         this.env.debug(c[i].wordCount.toString() + " :" + c[i].wordLevel.toString() + " :" + c[i].str + "\n");
54                 }
55                 this.env.debug("candidateWordList end\n");
56         },
57         filterCandidateWordList00:function(cList){
58                 //00:長い単語に含まれており、かつ出現頻度が長い単語と等しい単語を削除
59                 //cList内の候補単語に対して、フィルターをかける。
60                 var iLen = cList.length;
61                 if(iLen < 1){
62                         return;
63                 }
64                 var baseStrTag = cList[0];
65                 for(var i = 1; i < iLen; i++){
66                         var c = cList[i];
67                         if(baseStrTag.str.indexOf(c.str) != -1){
68                                 //c.strはbaseStrTag.strに含まれている
69                                 if(baseStrTag.wordCount == c.wordCount){
70                                         //かつ出現回数が等しいので不要な単語
71                                         //後で削除する。出現回数を0にマークする。
72                                         c.wordCount = 0;
73                                 }
74                         }
75                         if(c.wordCount > 0){
76                                 //単語は削除されなかった、つまり異なる単語なので、baseStrTagを更新
77                                 var baseStrTag = c;
78                         }
79                 }
80                 //削除処理
81                 for(var i = 1; i < iLen; i++){
82                         var c = cList[i];
83                         if(c.wordCount == 0){
84                                 cList.removeByIndex(i);
85                                 i--;
86                                 iLen--;
87                         }
88                 }
89         },
90         filterCandidateWordList01:function(cList, minLen){
91                 //01:minLenに満たない文字数の候補を削除
92                 //削除処理
93                 var iLen = cList.length;
94                 for(var i = 0; i < iLen; i++){
95                         if(cList[i].str.length < minLen){
96                                 cList.removeByIndex(i);
97                                 i--;
98                                 iLen--;
99                         }
100                 }
101         },
102         filterCandidateWordList02:function(cList, minCount){
103                 //02:minCountに満たない出現回数の候補を削除
104                 //削除処理
105                 var iLen = cList.length;
106                 for(var i = 0; i < iLen; i++){
107                         if(cList[i].wordCount < minCount){
108                                 cList.removeByIndex(i);
109                                 i--;
110                                 iLen--;
111                         }
112                 }
113         },
114         sortCandidateWordListByWordCount: function(){
115                 this.env.memory.candidateWordList.stableSort(function(a, b){
116                         return a.wordCount - b.wordCount;
117                 });
118         },
119         sortCandidateWordListByWordLevel: function(){
120                 this.env.memory.candidateWordList.stableSort(function(a, b){
121                         return a.wordLevel - b.wordLevel;
122                 });
123         },
124         computeWordLevel: function(strTag){
125                 var s = strTag.str;
126                 var iLen = s.length;
127                 var f = 0;
128                 strTag.wordLevel = 0;
129                 //文字列中の文字種数を数える
130                 for(var i = 0; i < iLen; i++){
131                         if(s.isHiraganaAt(i)){
132                                 f |= 0x01;
133                         } else if(s.isKanjiAt(i)){
134                                 f |= 0x02;
135                         } else if(s.isKatakanaAt(i)){
136                                 f |= 0x04;
137                         } else if(s.isHankakuKanaAt(i)){
138                                 f |= 0x08;
139                         } else{
140                                 f |= 0x10;
141                         }
142                 }
143                 for(var i = 0; i < 5; i++){
144                         if((f & 0x01) != 0){
145                                 strTag.wordLevel++;
146                         }
147                         f >>>= 1;
148                 }
149                 strTag.wordLevel = 1 / strTag.wordLevel;
150                 return;
151         },
152         computeEachWordLevel: function(){
153                 var iLen = this.env.memory.candidateWordList.length;
154                 for(var i = 0; i < iLen; i++){
155                         this.computeWordLevel(this.env.memory.candidateWordList[i]);
156                 }
157         }
158 }