OSDN Git Service

スパムフィルタの導入。
[gikonavigoeson/gikonavi.git] / GikoBayesian.pas
1 unit GikoBayesian;
2
3 {!
4 \file           GikoBayesian.pas
5 \brief  \83x\83C\83W\83A\83\93\83t\83B\83\8b\83^
6
7 $Id: GikoBayesian.pas,v 1.1 2004/10/20 18:25:00 yoffy Exp $
8 }
9
10 interface
11
12 //==================================================
13 uses
14 //==================================================
15         Classes, IniFiles;
16
17 //==================================================
18 type
19 //==================================================
20
21         {!***********************************************************
22         \brief \92P\8cê\83v\83\8d\83p\83e\83B
23         ************************************************************}
24         TWordInfo       = class( TObject )
25         private
26                 FNormalWord                     :       Integer;        //!< \92Ê\8fí\82Ì\92P\8cê\82Æ\82µ\82Ä\93o\8fê\82µ\82½\89ñ\90\94
27                 FImportantWord  : Integer;      //!< \92\8d\96Ú\92P\8cê\82Æ\82µ\82Ä\93o\8fê\82µ\82½\89ñ\90\94
28                 FNormalText                     : Integer;      //!< \92Ê\8fí\82Ì\92P\8cê\82Æ\82µ\82Ä\8aÜ\82Ü\82ê\82Ä\82¢\82½\95\8fÍ\82Ì\90\94
29                 FImportantText  : Integer;      //!< \92\8d\96Ú\92P\8cê\82Æ\82µ\82Ä\8aÜ\82Ü\82ê\82Ä\82¢\82½\95\8fÍ\82Ì\90\94
30
31         public
32                 property NormalWord                     : Integer       read FNormalWord write FNormalWord;
33                 property ImportantWord  : Integer       read FImportantWord write FImportantWord;
34                 property NormalText                     : Integer       read FNormalText write FNormalText;
35                 property ImportantText  : Integer       read FImportantText write FImportantText;
36         end;
37
38         {!***********************************************************
39         \brief \89ð\90Í\8dÏ\82Ý\92P\8cê\83v\83\8d\83p\83e\83B
40         ************************************************************}
41         TWordCountInfo  = class( TObject )
42         private
43                 FWordCount      :       Integer;        //!< \92P\8cê\90\94
44
45         public
46                 property WordCount      : Integer       read FWordCount write FWordCount;
47         end;
48
49         {!***********************************************************
50         \brief \89ð\90Í\8dÏ\82Ý\92P\8cê\83\8a\83X\83g
51         ************************************************************}
52 //      TWordCount      = class( THashedStringList )    // \8c\83\92x
53         TWordCount      = class( TStringList )  // \92x
54         public
55                 destructor Destroy; override;
56         end;
57
58         {!***********************************************************
59         \brief \83t\83B\83\8b\83^\83A\83\8b\83S\83\8a\83Y\83\80
60         ************************************************************}
61         TGikoBayesianAlgorithm =
62                 (gbaPaulGraham, gbaGaryRonbinson{, gbaGaryRonbinsonFisher});
63
64         {!***********************************************************
65         \brief \83x\83C\83W\83A\83\93\83t\83B\83\8b\83^
66         ************************************************************}
67         TGikoBayesian = class( THashedStringList )
68         private
69                 FFilePath       : string;       //!< \93Ç\82Ý\8d\9e\82ñ\82¾\83t\83@\83C\83\8b\83p\83X
70                 function GetObject( const name : string ) : TWordInfo;
71                 procedure SetObject( const name : string; value : TWordInfo );
72
73         public
74                 constructor Create;
75                 destructor Destroy; override;
76
77                 //! \83t\83@\83C\83\8b\82©\82ç\8aw\8fK\97\9a\97ð\82ð\93Ç\82Ý\8fo\82µ\82Ü\82·
78                 procedure LoadFromFile( const filePath : string );
79
80                 //! \83t\83@\83C\83\8b\82É\8aw\8fK\97\9a\97ð\82ð\95Û\91\82µ\82Ü\82·
81                 procedure SaveToFile( const filePath : string );
82
83                 //! \83t\83@\83C\83\8b\82É\8aw\8fK\97\9a\97ð\82ð\95Û\91\82µ\82Ü\82·
84                 procedure Save;
85
86                 //! \92P\8cê\82É\91Î\82·\82é\8fî\95ñ\82ð\8eæ\93¾\82µ\82Ü\82·
87                 property Objects[ const name : string ] : TWordInfo
88                         read GetObject write SetObject; default;
89
90                 //! \95\8fÍ\82É\8aÜ\82Ü\82ê\82é\92P\8cê\82ð\83J\83E\83\93\83g\82µ\82Ü\82·
91                 procedure CountWord(
92                         const text      : string;
93                         wordCount               : TWordCount );
94
95                 {!
96                 \brief  Paul Graham \96@\82É\8aî\82Ã\82¢\82Ä\95\8fÍ\82Ì\92\8d\96Ú\93x\82ð\8c\88\92è\82µ\82Ü\82·
97                 \return \95\8fÍ\82Ì\92\8d\96Ú\93x (\92\8d\96Ú\82É\92l\82µ\82È\82¢ 0.0\81`1.0 \92\8d\96Ú\82·\82×\82«)
98                 }
99                 function CalcPaulGraham( wordCount : TWordCount ) : Extended;
100
101                 {!
102                 \brief  GaryRobinson \96@\82É\8aî\82Ã\82¢\82Ä\95\8fÍ\82Ì\92\8d\96Ú\93x\82ð\8c\88\92è\82µ\82Ü\82·
103                 \return \95\8fÍ\82Ì\92\8d\96Ú\93x (\92\8d\96Ú\82É\92l\82µ\82È\82¢ 0.0\81`1.0 \92\8d\96Ú\82·\82×\82«)
104                 }
105                 function CalcGaryRobinson( wordCount : TWordCount ) : Extended;
106
107 //              function CalcGaryRobinsonFisher( wordCount : TWordCount ) : Extended;
108
109                 {!
110                 \brief  \95\8fÍ\82ð\89ð\90Í
111                 \param  text                                    \89ð\90Í\82·\82é\95\8fÍ
112                 \param  wordCount                       \89ð\90Í\82³\82ê\82½\92P\8cê\83\8a\83X\83g\82ª\95Ô\82é
113                 \param  algorithm                       \92\8d\96Ú\93x\82Ì\8c\88\92è\82É\97p\82¢\82é\83A\83\8b\83S\83\8a\83Y\83\80\82ð\8ew\92è\82µ\82Ü\82·
114                 \return \95\8fÍ\82Ì\92\8d\96Ú\93x (\92\8d\96Ú\82É\92l\82µ\82È\82¢ 0.0\81`1.0 \92\8d\96Ú\82·\82×\82«)
115
116                 CountWord \82Æ Calcxxxxx \82ð\82Ü\82Æ\82ß\82Ä\8eÀ\8ds\82·\82é\82¾\82¯\82Å\82·\81B
117                 }
118                 function Parse(
119                         const text                              : string;
120                         wordCount                                       : TWordCount;
121                         algorithm                                       : TGikoBayesianAlgorithm = gbaGaryRonbinson
122                 ) : Extended;
123
124                 {!
125                 \brief  \8aw\8fK\82·\82é
126                 \param  wordCount               Parse \82Å\89ð\90Í\82³\82ê\82½\92P\8cê\83\8a\83X\83g
127                 \param  isImportant \92\8d\96Ú\82·\82×\82«\95\8fÍ\82Æ\82µ\82Ä\8ao\82¦\82é\82È\82ç True
128                 }
129                 procedure Learn(
130                         wordCount                : TWordCount;
131                         isImportant      : Boolean );
132
133                 {!
134                 \brief          \8aw\8fK\8c\8b\89Ê\82ð\96Y\82ê\82é
135                 \param          wordCount               Parse \82Å\89ð\90Í\82³\82ê\82½\92P\8cê\83\8a\83X\83g
136                 \param          isImportant     \92\8d\96Ú\82·\82×\82«\95\8fÍ\82Æ\82µ\82Ä\8ao\82¦\82ç\82ê\82Ä\82¢\82½\82È\82ç True
137                 \warning        \8aw\8fK\8dÏ\82Ý\82Ì\95\8fÍ\82©\82Ç\82¤\82©\82Í\8am\94F\8fo\97\88\82Ü\82¹\82ñ\81B<br>
138                                                         Learn \82µ\82Ä\82¢\82È\82¢\95\8fÍ\82â isImportant \82ª\8aÔ\88á\82Á\82Ä\82¢\82é\95\8fÍ\82ð
139                                                         Forget \82·\82é\82Æ\83f\81[\83^\83x\81[\83X\82ª\94j\91¹\82µ\82Ü\82·\81B<br>
140                                                         \8aw\8fK\8dÏ\82Ý\82©\82Ç\82¤\82©\82Í\93Æ\8e©\82É\8aÇ\97\9d\82µ\82Ä\82­\82¾\82³\82¢\81B
141
142                 \91S\82Ä\82Ì\8aw\8fK\8c\8b\89Ê\82ð\83N\83\8a\83A\82·\82é\82í\82¯\82Å\82Í\82 \82è\82Ü\82¹\82ñ\81B<br>
143                 wordCount \82ð\93¾\82½\95\8fÍ (Parse \82Ì text \88ø\90\94\82Ì\8aw\8fK\8c\8b\89Ê\82Ì\82Ý\83N\83\8a\83A\82µ\82Ü\82·\81B<br><br>
144
145                 \8eå\82É\92\8d\96Ú\95\8fÍ\82Æ\94ñ\92\8d\96Ú\95\8fÍ\82ð\90Ø\82è\91Ö\82¦\82é\82½\82ß\82É Forget -> Learn \82Ì\8f\87\82Å\8eg\97p\82µ\82Ü\82·\81B
146                 }
147                 procedure       Forget(
148                         wordCount               : TWordCount;
149                         isImportant     : Boolean );
150         end;
151
152 //==================================================
153 implementation
154 //==================================================
155
156 uses
157         SysUtils, Math;
158
159 const
160         GIKO_BAYESIAN_FILE_VERSION      = '1.0';
161         kYofKanji : TSysCharSet                 = [#$80..#$A0, #$E0..#$ff];
162
163 //************************************************************
164 // misc
165 //************************************************************
166
167 //==============================
168 // RemoveToken
169 //==============================
170 function RemoveToken(var s: string;const delimiter: string): string;
171 var
172         p: Integer;
173 begin
174         p := AnsiPos(delimiter, s);
175         if p = 0 then
176                 Result := s
177         else
178                 Result := Copy(s, 1, p - 1);
179         s := Copy(s, Length(Result) + Length(delimiter) + 1, Length(s));
180 end;
181
182 //==============================
183 // AbsSort
184 //==============================
185 function AbsSort( p1, p2 : Pointer ) : Integer;
186 var
187         v1, v2 : Single;
188 begin
189
190         v1 := Abs( Single( p1 ) - 0.5 );
191         v2 := Abs( Single( p2 ) - 0.5 );
192         if v1 > v2 then
193                 Result := -1
194         else if v1 = v2 then
195                 Result := 0
196         else
197                 Result := 1;
198
199 end;
200
201 //************************************************************
202 // TWordCount class
203 //************************************************************
204 destructor TWordCount.Destroy;
205 var
206         i : Integer;
207 begin
208
209         for i := Count - 1 downto 0 do
210                 if Objects[ i ] <> nil then
211                         Objects[ i ].Free;
212
213         inherited;
214
215 end;
216
217 //************************************************************
218 // TGikoBayesian class
219 //************************************************************
220
221 //==============================
222 // Create
223 //==============================
224 constructor TGikoBayesian.Create;
225 begin
226
227         Duplicates := dupIgnore;
228         Sorted := True;
229
230 end;
231
232 //==============================
233 // Destroy
234 //==============================
235 destructor TGikoBayesian.Destroy;
236 var
237         i : Integer;
238 begin
239
240         for i := Count - 1 downto 0 do
241                 if inherited Objects[ i ] <> nil then
242                         inherited Objects[ i ].Free;
243
244         inherited;
245
246 end;
247
248 procedure TGikoBayesian.LoadFromFile( const filePath : string );
249 var
250         i                       : Integer;
251         sl              : TStringList;
252         s                       : string;
253         name    : string;
254         info    : TWordInfo;
255 begin
256
257         if not FileExists( filePath ) then
258                 Exit;
259                 
260         sl := TStringList.Create;
261         try
262                 sl.LoadFromFile( filePath );
263
264                 for i := 1 to sl.Count - 1 do begin
265                         s := sl[ i ];
266                         name := RemoveToken( s, #1 );
267                         info := TWordInfo.Create;
268                         info.NormalWord                 := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
269                         info.ImportantWord      := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
270                         info.NormalText                 := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
271                         info.ImportantText      := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
272
273                         AddObject( name, info );
274                 end;
275         finally
276                 sl.Free;
277         end;
278
279 end;
280
281 procedure TGikoBayesian.SaveToFile( const filePath : string );
282 var
283         i                       : Integer;
284         sl              : TStringList;
285         s                       : string;
286         info    : TWordInfo;
287 begin
288
289         sl := TStringList.Create;
290         try
291                 sl.BeginUpdate;
292                 sl.Add( GIKO_BAYESIAN_FILE_VERSION );
293
294                 for i := 0 to Count - 1 do begin
295                         info := TWordInfo( inherited Objects[ i ] );
296                         s := Strings[ i ] + #1
297                                  + Format('%x', [info.NormalWord]) + #1
298                                  + Format('%x', [info.ImportantWord]) + #1
299                                  + Format('%x', [info.NormalText]) + #1
300                                  + Format('%x', [info.ImportantText]);
301
302                         sl.Add(s);
303                 end;
304                 sl.EndUpdate;
305                 sl.SaveToFile( filePath );
306         finally
307                 sl.Free;
308         end;
309
310 end;
311
312 procedure TGikoBayesian.Save;
313 begin
314
315         if FFilePath <> '' then
316                 SaveToFile( FFilePath );
317
318 end;
319
320 //==============================
321 // GetObject
322 //==============================
323 function TGikoBayesian.GetObject( const name : string ) : TWordInfo;
324 var
325         idx : Integer;
326 begin
327
328         idx := IndexOf( name );
329         if idx < 0 then
330                 Result := nil
331         else
332                 Result := TWordInfo( inherited Objects[ idx ] );
333
334 end;
335
336 //==============================
337 // SetObject
338 //==============================
339 procedure TGikoBayesian.SetObject( const name : string; value : TWordInfo );
340 var
341         idx : Integer;
342 begin
343
344         idx := IndexOf( name );
345         if idx < 0 then
346                 AddObject( name, value )
347         else
348                 inherited Objects[ idx ] := value;
349
350 end;
351
352
353 //==============================
354 // CountWord
355 //==============================
356 procedure TGikoBayesian.CountWord(
357         const text      : string;
358         wordCount               : TWordCount );
359 type
360         Modes                           = (ModeWhite, ModeGraph, ModeAlpha, ModeNum, ModeHanKana,
361                                                                 ModeWGraph, ModeWAlpha, ModeWNum,
362                                                                 ModeWHira, ModeWKata, ModeWKanji);
363 var
364         p, tail, last   : PChar;
365         mode, newMode   : Modes;
366         aWord                                   : string;
367         ch                                              : Longword;
368         chSize                          : Integer;
369         delimiter                       : TStringList;
370         delimited                       : Boolean;
371         i, idx                          : Integer;
372         countInfo                       : TWordCountInfo;
373 const
374         KAKUJOSI = '\82ð' + #10 + '\82É' + #10 + '\82ª' + #10 + '\82Æ' + #10 + '\82©\82ç' +
375                 #10 + '\82Å' + #10 + '\82Ö' + #10 + '\82æ\82è' + #10 + '\82Ü\82Å';
376 begin
377
378         delimiter := TStringList.Create;
379         try
380                 //*** \91¬\93x\83e\83X\83g\92\86
381                 wordCount.Duplicates := dupIgnore;
382                 wordCount.CaseSensitive := True;
383                 wordCount.Capacity := 1000;
384                 wordCount.Sorted := True;
385                 //***
386
387                 mode := ModeWhite;
388                 delimiter.Text := KAKUJOSI;
389                 SetLength( aWord, 256 );
390                 p                       := PChar( text );
391                 tail    := p + Length( text );
392                 last    := p;
393
394                 while p < tail do begin
395                         delimited := False;
396                         // \95\8e\9a\82Ì\83^\83C\83v\82ð\94»\95Ê
397                         // \81¦\8bå\93Ç\93_\82Í ModeGraph \82É\82È\82é\82Ì\82Å\8cÂ\95Ê\82É\91Î\89\9e\82µ\82È\82­\82Ä\82à\82¢\82¢
398                         if p^ in kYofKanji then begin
399                                 if p + 1 < tail then begin
400                                         ch := (PByte( p )^ shl 8) or PByte( p + 1 )^;
401                                         case ch of
402                                         $8140:                                                  newMode := ModeWhite;
403                                         $8141..$824e:                           newMode := ModeWGraph;
404                                         $824f..$8258:                           newMode := ModeWNum;
405                                         $8260..$829a:                           newMode := ModeWAlpha;
406                                         $829f..$82f1:                           newMode := ModeWHira;
407                                         $8340..$8396:                           newMode := ModeWKata;
408                                         else                                                            newMode := ModeWKanji;
409                                         end;
410                                 end else begin
411                                         newMode := ModeWhite;
412                                 end;
413
414                                 chSize := 2;
415
416                                 // \8bæ\90Ø\82è\82É\82È\82é\95\8e\9a\82ª\82 \82é\82©\8c\9f\8d¸\82·\82é
417                                 if p + 3 < tail then begin      // 3 = delimiter \82Ì\8dÅ\91å\8e\9a\90\94 - 1
418                                         for i := 0 to delimiter.Count - 1 do begin
419                                                 if CompareMem(
420                                                         p, PChar( delimiter[ i ] ), Length( delimiter[ i ] ) ) then begin
421                                                         delimited := True;
422                                                         chSize := Length( delimiter[ i ] );
423                                                         Break;
424                                                 end;
425                                         end;
426                                 end;
427                         end else begin
428                                 case p^ of
429                                 #$0..#$20, #$7f:                                newMode := ModeWhite;
430                                 '0'..'9':                                                               newMode := ModeNum;
431                                 'a'..'z', 'A'..'Z':                     newMode := ModeAlpha;
432                                 #$A6..#$DD:                                                     newMode := ModeHanKana;
433                                 else                                                                            newMode := ModeGraph;
434                                 end;
435
436                                 chSize := 1;
437                         end;
438
439                         if (mode <> newMode) or delimited then begin
440
441                                 // \95\8e\9a\82Ì\83^\83C\83v\82ª\95Ï\8dX\82³\82ê\82½
442                                 // \82à\82µ\82­\82Í\8bæ\90Ø\82è\82É\82È\82é\95\8e\9a\82É\91\98\8bö\82µ\82½
443                                 if mode <> ModeWhite then begin
444                                         aWord := Copy( last, 0, p - last );     // \8c\83\92x
445 //                                      SetLength( aWord, p - last );
446 //                                      CopyMemory( PChar( aWord ), last, p - last );
447                                         idx := wordCount.IndexOf( aWord );      // \8c\83\92x
448                                         if idx < 0 then begin
449                                                 countInfo := TWordCountInfo.Create;
450                                                 wordCount.AddObject( aWord, countInfo );
451                                         end else begin
452                                                 countInfo := TWordCountInfo( wordCount.Objects[ idx ] );
453                                         end;
454                                         countInfo.WordCount := countInfo.WordCount + 1;
455                                 end;
456
457                                 last := p;
458                                 mode := newMode;
459
460                         end;
461
462                         p := p + chSize;
463                 end;    // while
464
465                 if mode <> ModeWhite then begin
466                         aWord := Copy( last, 0, p - last );
467                         idx := wordCount.IndexOf( aWord );
468                         if idx < 0 then begin
469                                 countInfo := TWordCountInfo.Create;
470                                 wordCount.AddObject( aWord, countInfo );
471                         end else begin
472                                 countInfo := TWordCountInfo( wordCount.Objects[ idx ] );
473                         end;
474                         countInfo.WordCount := countInfo.WordCount + 1;
475                 end;
476         finally
477                 delimiter.Free;
478         end;
479
480 end;
481
482 //==============================
483 // CalcPaulGraham
484 //==============================
485 function TGikoBayesian.CalcPaulGraham( wordCount : TWordCount ) : Extended;
486
487         function p( const aWord : string ) : Single;
488         var
489                 info : TWordInfo;
490         begin
491                 info := Objects[ aWord ];
492                 if info = nil then
493                         Result := 0.4
494                 else if info.NormalWord = 0 then
495                         Result := 0.99
496                 else if info.ImportantWord = 0 then
497                         Result := 0.01
498                 else
499                         Result := ( info.ImportantWord / info.ImportantText ) /
500                                 ((info.NormalWord * 2 / info.NormalText ) +
501                                  (info.ImportantWord / info.ImportantText));
502         end;
503
504 var
505         s, q                            : Extended;
506         i                                               : Integer;
507         narray                  : TList;
508 const
509         SAMPLE_COUNT    = 15;
510 begin
511
512         Result := 1;
513         if wordCount.Count = 0 then
514                 Exit;
515
516         narray := TList.Create;
517         try
518                 for i := 0 to wordCount.Count - 1 do begin
519                         narray.Add( Pointer( p( wordCount[ i ] ) ) );
520                 end;
521
522                 narray.Sort( AbsSort );
523
524                 s := 1;
525                 q := 1;
526                 i := min( SAMPLE_COUNT, narray.Count );
527                 while i > 0 do begin
528                         Dec( i );
529                         s := s * Single( narray[ i ] );
530                         q := q * (1 - Single( narray[ i ] ));
531                 end;
532
533                 Result := s / (s + q);
534         finally
535                 narray.Free;
536         end;
537
538 end;
539
540 //==============================
541 // CalcGaryRobinson
542 //==============================
543 function TGikoBayesian.CalcGaryRobinson( wordCount : TWordCount ) : Extended;
544
545         function p( const aWord : string ) : Single;
546         var
547                 info : TWordInfo;
548         begin
549                 info := Objects[ aWord ];
550                 if info = nil then
551                         Result := 0.415
552                 else if info.ImportantWord = 0 then
553                         Result := 0.0001
554                 else if info.NormalWord = 0 then
555                         Result := 0.9999
556                 else
557                         Result := ( info.ImportantWord / info.ImportantText ) /
558                                 ((info.NormalWord / info.NormalText ) +
559                                  (info.ImportantWord / info.ImportantText));
560         end;
561
562         function f( cnt : Integer; n, mean : Single ) : Extended;
563         const
564                 k = 0.00001;
565         begin
566                 Result := ( (k * mean) + (cnt * n) ) / (k + cnt);
567         end;
568
569 var
570         n                                               : Extended;
571         narray                  : array of Single;
572         mean                            : Extended;
573         countInfo               : TWordCountInfo;
574         i                                               : Integer;
575         normal                  : Extended;
576         important               : Extended;
577         cnt                                     : Extended;
578 begin
579
580         if wordCount.Count = 0 then begin
581                 Result := 1;
582                 Exit;
583         end;
584
585         SetLength( narray, wordCount.Count );
586         mean := 0;
587         for i := 0 to wordCount.Count - 1 do begin
588                 n                                               := p( wordCount[ i ] );
589                 narray[ i ]     := n;
590                 mean                            := mean + n;
591         end;
592         mean := mean / wordCount.Count;
593
594         cnt                             := 0;
595         normal          := 1;
596         important       := 1;
597         for i := 0 to wordCount.Count - 1 do begin
598                 countInfo       := TWordCountInfo( wordCount.Objects[ i ] );
599                 n                                               := f( countInfo.WordCount, narray[ i ], mean );
600                 normal                  := normal * n;
601                 important               := important * (1 - n);
602                 if countInfo <> nil then
603                         cnt                                     := cnt + countInfo.WordCount;
604         end;
605         if cnt = 0 then
606                 cnt := 1;
607         normal := 1 - Exp( Ln( normal ) * (1 / cnt) );
608         important := 1 - Exp( Ln( important ) * (1 / cnt) );
609
610         n := (important - normal+ 0.00001) / (important + normal + 0.00001);
611         Result := (1 + n) / 2;
612
613 end;
614
615 //==============================
616 // Parse
617 //==============================
618 function TGikoBayesian.Parse(
619         const text                              : string;
620         wordCount                                       : TWordCount;
621         algorithm                                       : TGikoBayesianAlgorithm = gbaGaryRonbinson
622 ) : Extended;
623 begin
624
625         CountWord( text, wordCount );
626         case algorithm of
627         gbaPaulGraham:          Result := CalcPaulGraham( wordCount );
628         gbaGaryRonbinson:       Result := CalcGaryRobinson( wordCount );
629         else                                                    Result := 0;
630         end;
631
632 end;
633
634 //==============================
635 // Learn
636 //==============================
637 procedure TGikoBayesian.Learn(
638         wordCount                : TWordCount;
639         isImportant      : Boolean );
640 var
641         aWord                   : string;
642         wordinfo        : TWordInfo;
643         countinfo       : TWordCountInfo;
644         i                       : Integer;
645 begin
646
647         for i := 0 to wordCount.Count - 1 do begin
648                 aWord := wordCount[ i ];
649                 wordinfo := Objects[ aWord ];
650                 if wordinfo = nil then begin
651                         wordinfo := TWordInfo.Create;
652                         Objects[ aWord ] := wordinfo;
653                 end;
654
655                 countinfo := TWordCountInfo( wordCount.Objects[ i ] );
656                 if isImportant then begin
657                         wordinfo.ImportantWord := wordinfo.ImportantWord + countinfo.WordCount;
658                         wordinfo.ImportantText := wordinfo.ImportantText + 1;
659                 end else begin
660                         wordinfo.NormalWord := wordinfo.NormalWord + countinfo.WordCount;
661                         wordinfo.NormalText := wordinfo.NormalText + 1;
662                 end;
663         end;
664
665 end;
666
667 //==============================
668 // Forget
669 //==============================
670 procedure       TGikoBayesian.Forget(
671         wordCount               : TWordCount;
672         isImportant     : Boolean );
673 var
674         aWord                   : string;
675         wordinfo        : TWordInfo;
676         countinfo       : TWordCountInfo;
677         i                       : Integer;
678 begin
679
680         for i := 0 to wordCount.Count - 1 do begin
681                 aWord := wordCount[ i ];
682                 wordinfo := Objects[ aWord ];
683                 if wordinfo = nil then
684                         Continue;
685
686                 countinfo := TWordCountInfo( wordCount.Objects[ i ] );
687                 if isImportant then begin
688                         wordinfo.ImportantWord := wordinfo.ImportantWord - countinfo.WordCount;
689                         wordinfo.ImportantText := wordinfo.ImportantText - 1;
690                 end else begin
691                         wordinfo.NormalWord := wordinfo.NormalWord - countinfo.WordCount;
692                         wordinfo.NormalText := wordinfo.NormalText - 1;
693                 end;
694         end;
695
696 end;
697
698 end.