OSDN Git Service

8291c82796dcf8ee701757cc8ea6660261497a93
[gikonavigoeson/gikonavi.git] / GikoBayesian.pas
1 unit GikoBayesian;
2
3 {!
4 \file           GikoBayesian.pas
5 \brief  \83x\83C\83W\83A\83\93\83t\83B\83\8b\83^
6
7 $Id: GikoBayesian.pas,v 1.2 2004/10/21 01:20:34 yoffy Exp $
8 }
9
10 interface
11
12 //==================================================
13 uses
14 //==================================================
15         Classes, IniFiles;
16
17 //==================================================
18 type
19 //==================================================
20
21         {!***********************************************************
22         \brief \92P\8cê\83v\83\8d\83p\83e\83B
23         ************************************************************}
24         TWordInfo       = class( TObject )
25         private
26                 FNormalWord                     :       Integer;        //!< \92Ê\8fí\82Ì\92P\8cê\82Æ\82µ\82Ä\93o\8fê\82µ\82½\89ñ\90\94
27                 FImportantWord  : Integer;      //!< \92\8d\96Ú\92P\8cê\82Æ\82µ\82Ä\93o\8fê\82µ\82½\89ñ\90\94
28                 FNormalText                     : Integer;      //!< \92Ê\8fí\82Ì\92P\8cê\82Æ\82µ\82Ä\8aÜ\82Ü\82ê\82Ä\82¢\82½\95\8fÍ\82Ì\90\94
29                 FImportantText  : Integer;      //!< \92\8d\96Ú\92P\8cê\82Æ\82µ\82Ä\8aÜ\82Ü\82ê\82Ä\82¢\82½\95\8fÍ\82Ì\90\94
30
31         public
32                 property NormalWord                     : Integer       read FNormalWord write FNormalWord;
33                 property ImportantWord  : Integer       read FImportantWord write FImportantWord;
34                 property NormalText                     : Integer       read FNormalText write FNormalText;
35                 property ImportantText  : Integer       read FImportantText write FImportantText;
36         end;
37
38         {!***********************************************************
39         \brief \89ð\90Í\8dÏ\82Ý\92P\8cê\83v\83\8d\83p\83e\83B
40         ************************************************************}
41         TWordCountInfo  = class( TObject )
42         private
43                 FWordCount      :       Integer;        //!< \92P\8cê\90\94
44
45         public
46                 property WordCount      : Integer       read FWordCount write FWordCount;
47         end;
48
49         {!***********************************************************
50         \brief \89ð\90Í\8dÏ\82Ý\92P\8cê\83\8a\83X\83g
51         ************************************************************}
52 //      TWordCount      = class( THashedStringList )    // \8c\83\92x
53         TWordCount      = class( TStringList )  // \92x
54         public
55                 destructor Destroy; override;
56         end;
57
58         {!***********************************************************
59         \brief \83t\83B\83\8b\83^\83A\83\8b\83S\83\8a\83Y\83\80
60         ************************************************************}
61         TGikoBayesianAlgorithm =
62                 (gbaPaulGraham, gbaGaryRonbinson{, gbaGaryRonbinsonFisher});
63
64         {!***********************************************************
65         \brief \83x\83C\83W\83A\83\93\83t\83B\83\8b\83^
66         ************************************************************}
67         TGikoBayesian = class( THashedStringList )
68         private
69                 FFilePath       : string;       //!< \93Ç\82Ý\8d\9e\82ñ\82¾\83t\83@\83C\83\8b\83p\83X
70                 function GetObject( const name : string ) : TWordInfo;
71                 procedure SetObject( const name : string; value : TWordInfo );
72
73         public
74                 constructor Create;
75                 destructor Destroy; override;
76
77                 //! \83t\83@\83C\83\8b\82©\82ç\8aw\8fK\97\9a\97ð\82ð\93Ç\82Ý\8fo\82µ\82Ü\82·
78                 procedure LoadFromFile( const filePath : string );
79
80                 //! \83t\83@\83C\83\8b\82É\8aw\8fK\97\9a\97ð\82ð\95Û\91\82µ\82Ü\82·
81                 procedure SaveToFile( const filePath : string );
82
83                 //! \83t\83@\83C\83\8b\82É\8aw\8fK\97\9a\97ð\82ð\95Û\91\82µ\82Ü\82·
84                 procedure Save;
85
86                 //! \92P\8cê\82É\91Î\82·\82é\8fî\95ñ\82ð\8eæ\93¾\82µ\82Ü\82·
87                 property Objects[ const name : string ] : TWordInfo
88                         read GetObject write SetObject; default;
89
90                 //! \95\8fÍ\82É\8aÜ\82Ü\82ê\82é\92P\8cê\82ð\83J\83E\83\93\83g\82µ\82Ü\82·
91                 procedure CountWord(
92                         const text      : string;
93                         wordCount               : TWordCount );
94
95                 {!
96                 \brief  Paul Graham \96@\82É\8aî\82Ã\82¢\82Ä\95\8fÍ\82Ì\92\8d\96Ú\93x\82ð\8c\88\92è\82µ\82Ü\82·
97                 \return \95\8fÍ\82Ì\92\8d\96Ú\93x (\92\8d\96Ú\82É\92l\82µ\82È\82¢ 0.0\81`1.0 \92\8d\96Ú\82·\82×\82«)
98                 }
99                 function CalcPaulGraham( wordCount : TWordCount ) : Extended;
100
101                 {!
102                 \brief  GaryRobinson \96@\82É\8aî\82Ã\82¢\82Ä\95\8fÍ\82Ì\92\8d\96Ú\93x\82ð\8c\88\92è\82µ\82Ü\82·
103                 \return \95\8fÍ\82Ì\92\8d\96Ú\93x (\92\8d\96Ú\82É\92l\82µ\82È\82¢ 0.0\81`1.0 \92\8d\96Ú\82·\82×\82«)
104                 }
105                 function CalcGaryRobinson( wordCount : TWordCount ) : Extended;
106
107 //              function CalcGaryRobinsonFisher( wordCount : TWordCount ) : Extended;
108
109                 {!
110                 \brief  \95\8fÍ\82ð\89ð\90Í
111                 \param  text                                    \89ð\90Í\82·\82é\95\8fÍ
112                 \param  wordCount                       \89ð\90Í\82³\82ê\82½\92P\8cê\83\8a\83X\83g\82ª\95Ô\82é
113                 \param  algorithm                       \92\8d\96Ú\93x\82Ì\8c\88\92è\82É\97p\82¢\82é\83A\83\8b\83S\83\8a\83Y\83\80\82ð\8ew\92è\82µ\82Ü\82·
114                 \return \95\8fÍ\82Ì\92\8d\96Ú\93x (\92\8d\96Ú\82É\92l\82µ\82È\82¢ 0.0\81`1.0 \92\8d\96Ú\82·\82×\82«)
115
116                 CountWord \82Æ Calcxxxxx \82ð\82Ü\82Æ\82ß\82Ä\8eÀ\8ds\82·\82é\82¾\82¯\82Å\82·\81B
117                 }
118                 function Parse(
119                         const text                              : string;
120                         wordCount                                       : TWordCount;
121                         algorithm                                       : TGikoBayesianAlgorithm = gbaGaryRonbinson
122                 ) : Extended;
123
124                 {!
125                 \brief  \8aw\8fK\82·\82é
126                 \param  wordCount               Parse \82Å\89ð\90Í\82³\82ê\82½\92P\8cê\83\8a\83X\83g
127                 \param  isImportant \92\8d\96Ú\82·\82×\82«\95\8fÍ\82Æ\82µ\82Ä\8ao\82¦\82é\82È\82ç True
128                 }
129                 procedure Learn(
130                         wordCount                : TWordCount;
131                         isImportant      : Boolean );
132
133                 {!
134                 \brief          \8aw\8fK\8c\8b\89Ê\82ð\96Y\82ê\82é
135                 \param          wordCount               Parse \82Å\89ð\90Í\82³\82ê\82½\92P\8cê\83\8a\83X\83g
136                 \param          isImportant     \92\8d\96Ú\82·\82×\82«\95\8fÍ\82Æ\82µ\82Ä\8ao\82¦\82ç\82ê\82Ä\82¢\82½\82È\82ç True
137                 \warning        \8aw\8fK\8dÏ\82Ý\82Ì\95\8fÍ\82©\82Ç\82¤\82©\82Í\8am\94F\8fo\97\88\82Ü\82¹\82ñ\81B<br>
138                                                         Learn \82µ\82Ä\82¢\82È\82¢\95\8fÍ\82â isImportant \82ª\8aÔ\88á\82Á\82Ä\82¢\82é\95\8fÍ\82ð
139                                                         Forget \82·\82é\82Æ\83f\81[\83^\83x\81[\83X\82ª\94j\91¹\82µ\82Ü\82·\81B<br>
140                                                         \8aw\8fK\8dÏ\82Ý\82©\82Ç\82¤\82©\82Í\93Æ\8e©\82É\8aÇ\97\9d\82µ\82Ä\82­\82¾\82³\82¢\81B
141
142                 \91S\82Ä\82Ì\8aw\8fK\8c\8b\89Ê\82ð\83N\83\8a\83A\82·\82é\82í\82¯\82Å\82Í\82 \82è\82Ü\82¹\82ñ\81B<br>
143                 wordCount \82ð\93¾\82½\95\8fÍ (Parse \82Ì text \88ø\90\94\82Ì\8aw\8fK\8c\8b\89Ê\82Ì\82Ý\83N\83\8a\83A\82µ\82Ü\82·\81B<br><br>
144
145                 \8eå\82É\92\8d\96Ú\95\8fÍ\82Æ\94ñ\92\8d\96Ú\95\8fÍ\82ð\90Ø\82è\91Ö\82¦\82é\82½\82ß\82É Forget -> Learn \82Ì\8f\87\82Å\8eg\97p\82µ\82Ü\82·\81B
146                 }
147                 procedure       Forget(
148                         wordCount               : TWordCount;
149                         isImportant     : Boolean );
150         end;
151
152 //==================================================
153 implementation
154 //==================================================
155
156 uses
157         SysUtils, Math
158 {$IFDEF BENCHMARK}
159         , Windows, Dialogs
160 {$ENDIF}
161         ;
162
163 {$IFDEF BENCHMARK}
164 var
165         b1, b2, b3, b4, b5, b6, b7, b8, b9, b10 : Int64; // benchmark
166 {$ENDIF}
167
168 const
169         GIKO_BAYESIAN_FILE_VERSION      = '1.0';
170         kYofKanji : TSysCharSet                 = [#$80..#$A0, #$E0..#$ff];
171
172 //************************************************************
173 // misc
174 //************************************************************
175
176 //==============================
177 // RemoveToken
178 //==============================
179 function RemoveToken(var s: string;const delimiter: string): string;
180 var
181         p: Integer;
182 begin
183         p := AnsiPos(delimiter, s);
184         if p = 0 then
185                 Result := s
186         else
187                 Result := Copy(s, 1, p - 1);
188         s := Copy(s, Length(Result) + Length(delimiter) + 1, Length(s));
189 end;
190
191 //==============================
192 // AbsSort
193 //==============================
194 function AbsSort( p1, p2 : Pointer ) : Integer;
195 var
196         v1, v2 : Single;
197 begin
198
199         v1 := Abs( Single( p1 ) - 0.5 );
200         v2 := Abs( Single( p2 ) - 0.5 );
201         if v1 > v2 then
202                 Result := -1
203         else if v1 = v2 then
204                 Result := 0
205         else
206                 Result := 1;
207
208 end;
209
210 //************************************************************
211 // TWordCount class
212 //************************************************************
213 destructor TWordCount.Destroy;
214 var
215         i : Integer;
216 begin
217
218         for i := Count - 1 downto 0 do
219                 if Objects[ i ] <> nil then
220                         Objects[ i ].Free;
221
222         inherited;
223
224 end;
225
226 //************************************************************
227 // TGikoBayesian class
228 //************************************************************
229
230 //==============================
231 // Create
232 //==============================
233 constructor TGikoBayesian.Create;
234 begin
235
236 {$IFDEF BENCHMARK}
237         b1:=0; b2:=0; b3:=0; b4:=0; b5:=0; b6:=0; b7:=0; b8:=0; b9:=0; b10:=0;
238 {$ENDIF}
239
240         Duplicates := dupIgnore;
241         Sorted := True;
242
243 end;
244
245 //==============================
246 // Destroy
247 //==============================
248 destructor TGikoBayesian.Destroy;
249 var
250         i : Integer;
251 begin
252
253         for i := Count - 1 downto 0 do
254                 if inherited Objects[ i ] <> nil then
255                         inherited Objects[ i ].Free;
256
257         inherited;
258
259 end;
260
261 procedure TGikoBayesian.LoadFromFile( const filePath : string );
262 var
263         i                       : Integer;
264         sl              : TStringList;
265         s                       : string;
266         name    : string;
267         info    : TWordInfo;
268 begin
269
270         FFilePath := filePath;
271
272         if not FileExists( filePath ) then
273                 Exit;
274
275         sl := TStringList.Create;
276         try
277                 sl.LoadFromFile( filePath );
278
279                 for i := 1 to sl.Count - 1 do begin
280                         s := sl[ i ];
281                         name := RemoveToken( s, #1 );
282                         info := TWordInfo.Create;
283                         info.NormalWord                 := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
284                         info.ImportantWord      := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
285                         info.NormalText                 := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
286                         info.ImportantText      := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
287
288                         AddObject( name, info );
289                 end;
290         finally
291                 sl.Free;
292         end;
293
294 end;
295
296 procedure TGikoBayesian.SaveToFile( const filePath : string );
297 var
298         i                       : Integer;
299         sl              : TStringList;
300         s                       : string;
301         info    : TWordInfo;
302 begin
303
304 {$IFDEF BENCHMARK}
305         ShowMessage(IntToStr(b1)+'/'+IntToStr(b2)+'/'+IntToStr(b3)+'/'+IntToStr(b4)+
306                 '/'+IntToStr(b5)+'/'+IntToStr(b6));
307 {$ENDIF}
308
309         FFilePath := filePath;
310
311         sl := TStringList.Create;
312         try
313                 sl.BeginUpdate;
314                 sl.Add( GIKO_BAYESIAN_FILE_VERSION );
315
316                 for i := 0 to Count - 1 do begin
317                         info := TWordInfo( inherited Objects[ i ] );
318                         s := Strings[ i ] + #1
319                                  + Format('%x', [info.NormalWord]) + #1
320                                  + Format('%x', [info.ImportantWord]) + #1
321                                  + Format('%x', [info.NormalText]) + #1
322                                  + Format('%x', [info.ImportantText]);
323
324                         sl.Add(s);
325                 end;
326                 sl.EndUpdate;
327                 sl.SaveToFile( filePath );
328         finally
329                 sl.Free;
330         end;
331
332 end;
333
334 procedure TGikoBayesian.Save;
335 begin
336
337         if FFilePath <> '' then
338                 SaveToFile( FFilePath );
339
340 end;
341
342 //==============================
343 // GetObject
344 //==============================
345 function TGikoBayesian.GetObject( const name : string ) : TWordInfo;
346 var
347         idx : Integer;
348 begin
349
350         idx := IndexOf( name );
351         if idx < 0 then
352                 Result := nil
353         else
354                 Result := TWordInfo( inherited Objects[ idx ] );
355
356 end;
357
358 //==============================
359 // SetObject
360 //==============================
361 procedure TGikoBayesian.SetObject( const name : string; value : TWordInfo );
362 var
363         idx : Integer;
364 begin
365
366         idx := IndexOf( name );
367         if idx < 0 then
368                 AddObject( name, value )
369         else
370                 inherited Objects[ idx ] := value;
371
372 end;
373
374
375 //==============================
376 // CountWord
377 //==============================
378 procedure TGikoBayesian.CountWord(
379         const text      : string;
380         wordCount               : TWordCount );
381 type
382         Modes                           = (ModeWhite, ModeGraph, ModeAlpha, ModeNum, ModeHanKana,
383                                                                 ModeWGraph, ModeWAlpha, ModeWNum,
384                                                                 ModeWHira, ModeWKata, ModeWKanji);
385 var
386         p, tail, last   : PChar;
387         mode, newMode   : Modes;
388         aWord                                   : string;
389         ch                                              : Longword;
390         chSize                          : Integer;
391         delimiter                       : TStringList;
392         delimited                       : Boolean;
393         i, idx                          : Integer;
394         countInfo                       : TWordCountInfo;
395 {$IFDEF BENCHMARK}
396         t1, t2                          : Int64;
397 {$ENDIF}
398 const
399         KAKUJOSI = '\82ð' + #10 + '\82É' + #10 + '\82ª' + #10 + '\82Æ' + #10 + '\82©\82ç' +
400                 #10 + '\82Å' + #10 + '\82Ö' + #10 + '\82æ\82è' + #10 + '\82Ü\82Å';
401 begin
402
403         delimiter := TStringList.Create;
404         try
405                 //*** \91¬\93x\83e\83X\83g\92\86
406                 wordCount.Duplicates := dupIgnore;
407                 wordCount.CaseSensitive := True;
408                 wordCount.Capacity := 1000;
409                 wordCount.Sorted := True;
410                 //***
411
412                 mode := ModeWhite;
413                 delimiter.Text := KAKUJOSI;
414                 SetLength( aWord, 256 );
415                 p                       := PChar( text );
416                 tail    := p + Length( text );
417                 last    := p;
418
419                 while p < tail do begin
420 {$IFDEF BENCHMARK}
421                         QueryPerformanceCounter( t1 );
422 {$ENDIF}
423                         delimited := False;
424                         // \95\8e\9a\82Ì\83^\83C\83v\82ð\94»\95Ê
425                         // \81¦\8bå\93Ç\93_\82Í ModeGraph \82É\82È\82é\82Ì\82Å\8cÂ\95Ê\82É\91Î\89\9e\82µ\82È\82­\82Ä\82à\82¢\82¢
426                         if p^ in kYofKanji then begin
427                                 if p + 1 < tail then begin
428                                         ch := (PByte( p )^ shl 8) or PByte( p + 1 )^;
429                                         case ch of
430                                         $8140:                                                  newMode := ModeWhite;
431                                         $8141..$824e:                           newMode := ModeWGraph;
432                                         $824f..$8258:                           newMode := ModeWNum;
433                                         $8260..$829a:                           newMode := ModeWAlpha;
434                                         $829f..$82f1:                           newMode := ModeWHira;
435                                         $8340..$8396:                           newMode := ModeWKata;
436                                         else                                                            newMode := ModeWKanji;
437                                         end;
438                                 end else begin
439                                         newMode := ModeWhite;
440                                 end;
441
442                                 chSize := 2;
443
444                                 // \8bæ\90Ø\82è\82É\82È\82é\95\8e\9a\82ª\82 \82é\82©\8c\9f\8d¸\82·\82é
445                                 if p + 3 < tail then begin      // 3 = delimiter \82Ì\8dÅ\91å\8e\9a\90\94 - 1
446                                         for i := 0 to delimiter.Count - 1 do begin
447                                                 if CompareMem(
448                                                         p, PChar( delimiter[ i ] ), Length( delimiter[ i ] ) ) then begin
449                                                         delimited := True;
450                                                         chSize := Length( delimiter[ i ] );
451                                                         Break;
452                                                 end;
453                                         end;
454                                 end;
455                         end else begin
456                                 case p^ of
457                                 #$0..#$20, #$7f:                                newMode := ModeWhite;
458                                 '0'..'9':                                                               newMode := ModeNum;
459                                 'a'..'z', 'A'..'Z':                     newMode := ModeAlpha;
460                                 #$A6..#$DD:                                                     newMode := ModeHanKana;
461                                 else                                                                            newMode := ModeGraph;
462                                 end;
463
464                                 chSize := 1;
465                         end;
466 {$IFDEF BENCHMARK}
467                         QueryPerformanceCounter( t2 );  b1 := b1 + (t2 - t1);
468 {$ENDIF}
469
470                         if (mode <> newMode) or delimited then begin
471
472                                 // \95\8e\9a\82Ì\83^\83C\83v\82ª\95Ï\8dX\82³\82ê\82½
473                                 // \82à\82µ\82­\82Í\8bæ\90Ø\82è\82É\82È\82é\95\8e\9a\82É\91\98\8bö\82µ\82½
474                                 if mode <> ModeWhite then begin
475 {$IFDEF BENCHMARK}
476                                         QueryPerformanceCounter( t1 );
477 {$ENDIF}
478                                         aWord := Copy( last, 0, p - last );     // \8c\83\92x
479 //                                      SetLength( aWord, p - last );
480 //                                      CopyMemory( PChar( aWord ), last, p - last );
481 {$IFDEF BENCHMARK}
482                                         QueryPerformanceCounter( t2 );  b2 := b2 + (t2 - t1);
483 {$ENDIF}
484                                         idx := wordCount.IndexOf( aWord );      // \8c\83\92x
485 {$IFDEF BENCHMARK}
486                                         QueryPerformanceCounter( t1 );  b3 := b3 + (t1 - t2);
487 {$ENDIF}
488                                         if idx < 0 then begin
489                                                 countInfo := TWordCountInfo.Create;
490                                                 wordCount.AddObject( aWord, countInfo );
491                                         end else begin
492                                                 countInfo := TWordCountInfo( wordCount.Objects[ idx ] );
493                                         end;
494                                         countInfo.WordCount := countInfo.WordCount + 1;
495 {$IFDEF BENCHMARK}
496                                         QueryPerformanceCounter( t2 );  b4 := b4 + (t2 - t1);
497 {$ENDIF}
498                                 end;
499
500                                 last := p;
501                                 mode := newMode;
502
503                         end;
504
505                         p := p + chSize;
506                 end;    // while
507
508                 if mode <> ModeWhite then begin
509                         aWord := Copy( last, 0, p - last );
510                         idx := wordCount.IndexOf( aWord );
511                         if idx < 0 then begin
512                                 countInfo := TWordCountInfo.Create;
513                                 wordCount.AddObject( aWord, countInfo );
514                         end else begin
515                                 countInfo := TWordCountInfo( wordCount.Objects[ idx ] );
516                         end;
517                         countInfo.WordCount := countInfo.WordCount + 1;
518                 end;
519         finally
520                 delimiter.Free;
521         end;
522
523 end;
524
525 //==============================
526 // CalcPaulGraham
527 //==============================
528 function TGikoBayesian.CalcPaulGraham( wordCount : TWordCount ) : Extended;
529
530         function p( const aWord : string ) : Single;
531         var
532                 info : TWordInfo;
533         begin
534                 info := Objects[ aWord ];
535                 if info = nil then
536                         Result := 0.4
537                 else if info.NormalWord = 0 then
538                         Result := 0.99
539                 else if info.ImportantWord = 0 then
540                         Result := 0.01
541                 else
542                         Result := ( info.ImportantWord / info.ImportantText ) /
543                                 ((info.NormalWord * 2 / info.NormalText ) +
544                                  (info.ImportantWord / info.ImportantText));
545         end;
546
547 var
548         s, q                            : Extended;
549         i                                               : Integer;
550         narray                  : TList;
551 const
552         SAMPLE_COUNT    = 15;
553 begin
554
555         Result := 1;
556         if wordCount.Count = 0 then
557                 Exit;
558
559         narray := TList.Create;
560         try
561                 for i := 0 to wordCount.Count - 1 do begin
562                         narray.Add( Pointer( p( wordCount[ i ] ) ) );
563                 end;
564
565                 narray.Sort( AbsSort );
566
567                 s := 1;
568                 q := 1;
569                 i := min( SAMPLE_COUNT, narray.Count );
570                 while i > 0 do begin
571                         Dec( i );
572                         s := s * Single( narray[ i ] );
573                         q := q * (1 - Single( narray[ i ] ));
574                 end;
575
576                 Result := s / (s + q);
577         finally
578                 narray.Free;
579         end;
580
581 end;
582
583 //==============================
584 // CalcGaryRobinson
585 //==============================
586 function TGikoBayesian.CalcGaryRobinson( wordCount : TWordCount ) : Extended;
587
588         function p( const aWord : string ) : Single;
589         var
590                 info : TWordInfo;
591         begin
592                 info := Objects[ aWord ];
593                 if info = nil then
594                         Result := 0.415
595                 else if info.ImportantWord = 0 then
596                         Result := 0.0001
597                 else if info.NormalWord = 0 then
598                         Result := 0.9999
599                 else
600                         Result := ( info.ImportantWord / info.ImportantText ) /
601                                 ((info.NormalWord / info.NormalText ) +
602                                  (info.ImportantWord / info.ImportantText));
603         end;
604
605         function f( cnt : Integer; n, mean : Single ) : Extended;
606         const
607                 k = 0.00001;
608         begin
609                 Result := ( (k * mean) + (cnt * n) ) / (k + cnt);
610         end;
611
612 var
613         n                                               : Extended;
614         narray                  : array of Single;
615         mean                            : Extended;
616         countInfo               : TWordCountInfo;
617         i                                               : Integer;
618         normal                  : Extended;
619         important               : Extended;
620         cnt                                     : Extended;
621 begin
622
623         if wordCount.Count = 0 then begin
624                 Result := 1;
625                 Exit;
626         end;
627
628         SetLength( narray, wordCount.Count );
629         mean := 0;
630         for i := 0 to wordCount.Count - 1 do begin
631                 n                                               := p( wordCount[ i ] );
632                 narray[ i ]     := n;
633                 mean                            := mean + n;
634         end;
635         mean := mean / wordCount.Count;
636
637         cnt                             := 0;
638         normal          := 1;
639         important       := 1;
640         for i := 0 to wordCount.Count - 1 do begin
641                 countInfo       := TWordCountInfo( wordCount.Objects[ i ] );
642                 n                                               := f( countInfo.WordCount, narray[ i ], mean );
643                 normal                  := normal * n;
644                 important               := important * (1 - n);
645                 if countInfo <> nil then
646                         cnt                                     := cnt + countInfo.WordCount;
647         end;
648         if cnt = 0 then
649                 cnt := 1;
650         normal := 1 - Exp( Ln( normal ) * (1 / cnt) );
651         important := 1 - Exp( Ln( important ) * (1 / cnt) );
652
653         n := (important - normal+ 0.00001) / (important + normal + 0.00001);
654         Result := (1 + n) / 2;
655
656 end;
657
658 //==============================
659 // Parse
660 //==============================
661 function TGikoBayesian.Parse(
662         const text                              : string;
663         wordCount                                       : TWordCount;
664         algorithm                                       : TGikoBayesianAlgorithm = gbaGaryRonbinson
665 ) : Extended;
666 {$IFDEF BENCHMARK}
667 var
668         t1, t2  : Int64;
669 {$ENDIF}
670 begin
671
672 {$IFDEF BENCHMARK}
673         QueryPerformanceCounter( t1 );
674 {$ENDIF}
675         CountWord( text, wordCount );
676 {$IFDEF BENCHMARK}
677         QueryPerformanceCounter( t2 );  b5 := b5 + (t2 - t1);
678 {$ENDIF}
679         case algorithm of
680         gbaPaulGraham:          Result := CalcPaulGraham( wordCount );
681         gbaGaryRonbinson:       Result := CalcGaryRobinson( wordCount );
682         else                                                    Result := 0;
683         end;
684 {$IFDEF BENCHMARK}
685         QueryPerformanceCounter( t1 );  b6 := b6 + (t1 - t2);
686 {$ENDIF}
687
688 end;
689
690 //==============================
691 // Learn
692 //==============================
693 procedure TGikoBayesian.Learn(
694         wordCount                : TWordCount;
695         isImportant      : Boolean );
696 var
697         aWord                   : string;
698         wordinfo        : TWordInfo;
699         countinfo       : TWordCountInfo;
700         i                       : Integer;
701 begin
702
703         for i := 0 to wordCount.Count - 1 do begin
704                 aWord := wordCount[ i ];
705                 wordinfo := Objects[ aWord ];
706                 if wordinfo = nil then begin
707                         wordinfo := TWordInfo.Create;
708                         Objects[ aWord ] := wordinfo;
709                 end;
710
711                 countinfo := TWordCountInfo( wordCount.Objects[ i ] );
712                 if isImportant then begin
713                         wordinfo.ImportantWord := wordinfo.ImportantWord + countinfo.WordCount;
714                         wordinfo.ImportantText := wordinfo.ImportantText + 1;
715                 end else begin
716                         wordinfo.NormalWord := wordinfo.NormalWord + countinfo.WordCount;
717                         wordinfo.NormalText := wordinfo.NormalText + 1;
718                 end;
719         end;
720
721 end;
722
723 //==============================
724 // Forget
725 //==============================
726 procedure       TGikoBayesian.Forget(
727         wordCount               : TWordCount;
728         isImportant     : Boolean );
729 var
730         aWord                   : string;
731         wordinfo        : TWordInfo;
732         countinfo       : TWordCountInfo;
733         i                       : Integer;
734 begin
735
736         for i := 0 to wordCount.Count - 1 do begin
737                 aWord := wordCount[ i ];
738                 wordinfo := Objects[ aWord ];
739                 if wordinfo = nil then
740                         Continue;
741
742                 countinfo := TWordCountInfo( wordCount.Objects[ i ] );
743                 if isImportant then begin
744                         wordinfo.ImportantWord := wordinfo.ImportantWord - countinfo.WordCount;
745                         wordinfo.ImportantText := wordinfo.ImportantText - 1;
746                 end else begin
747                         wordinfo.NormalWord := wordinfo.NormalWord - countinfo.WordCount;
748                         wordinfo.NormalText := wordinfo.NormalText - 1;
749                 end;
750         end;
751
752 end;
753
754 end.