5 \brief
\83x
\83C
\83W
\83A
\83\93\83t
\83B
\83\8b\83^
7 $Id: GikoBayesian.pas,v 1.9 2004/10/27 00:10:12 yoffy Exp $
12 //==================================================
14 //==================================================
17 //==================================================
19 //==================================================
21 {!***********************************************************
22 \brief
\92P
\8cê
\83v
\83\8d\83p
\83e
\83B
23 ************************************************************}
24 TWordInfo = class( TObject )
26 FNormalWord : Integer; //!<
\92Ê
\8fí
\82Ì
\92P
\8cê
\82Æ
\82µ
\82Ä
\93o
\8fê
\82µ
\82½
\89ñ
\90\94
27 FImportantWord : Integer; //!<
\92\8d\96Ú
\92P
\8cê
\82Æ
\82µ
\82Ä
\93o
\8fê
\82µ
\82½
\89ñ
\90\94
28 FNormalText : Integer; //!<
\92Ê
\8fí
\82Ì
\92P
\8cê
\82Æ
\82µ
\82Ä
\8aÜ
\82Ü
\82ê
\82Ä
\82¢
\82½
\95¶
\8fÍ
\82Ì
\90\94
29 FImportantText : Integer; //!<
\92\8d\96Ú
\92P
\8cê
\82Æ
\82µ
\82Ä
\8aÜ
\82Ü
\82ê
\82Ä
\82¢
\82½
\95¶
\8fÍ
\82Ì
\90\94
32 property NormalWord : Integer read FNormalWord write FNormalWord;
33 property ImportantWord : Integer read FImportantWord write FImportantWord;
34 property NormalText : Integer read FNormalText write FNormalText;
35 property ImportantText : Integer read FImportantText write FImportantText;
38 {!***********************************************************
39 \brief
\89ð
\90Í
\8dÏ
\82Ý
\92P
\8cê
\83v
\83\8d\83p
\83e
\83B
40 ************************************************************}
41 TWordCountInfo = class( TObject )
43 FWordCount : Integer; //!<
\92P
\8cê
\90\94
46 property WordCount : Integer read FWordCount write FWordCount;
49 {!***********************************************************
50 \brief
\89ð
\90Í
\8dÏ
\82Ý
\92P
\8cê
\83\8a\83X
\83g
51 ************************************************************}
52 // TWordCount = class( THashedStringList ) //
\8c\83\92x
53 TWordCount = class( TStringList )
56 destructor Destroy; override;
59 {!***********************************************************
60 \brief
\83t
\83B
\83\8b\83^
\83A
\83\8b\83S
\83\8a\83Y
\83\80
61 ************************************************************}
62 TGikoBayesianAlgorithm =
63 (gbaPaulGraham, gbaGaryRobinson, gbaGaryRobinsonFisher);
65 {!***********************************************************
66 \brief
\83x
\83C
\83W
\83A
\83\93\83t
\83B
\83\8b\83^
67 ************************************************************}
68 // TGikoBayesian = class( THashedStringList ) //
\8c\83\92x
69 TGikoBayesian = class( TStringList )
71 FFilePath : string; //!<
\93Ç
\82Ý
\8d\9e\82ñ
\82¾
\83t
\83@
\83C
\83\8b\83p
\83X
72 function GetObject( const name : string ) : TWordInfo;
73 procedure SetObject( const name : string; value : TWordInfo );
77 destructor Destroy; override;
79 //!
\83t
\83@
\83C
\83\8b\82©
\82ç
\8aw
\8fK
\97\9a\97ð
\82ð
\93Ç
\82Ý
\8fo
\82µ
\82Ü
\82·
80 procedure LoadFromFile( const filePath : string );
82 //!
\83t
\83@
\83C
\83\8b\82É
\8aw
\8fK
\97\9a\97ð
\82ð
\95Û
\91¶
\82µ
\82Ü
\82·
83 procedure SaveToFile( const filePath : string );
85 //!
\83t
\83@
\83C
\83\8b\82É
\8aw
\8fK
\97\9a\97ð
\82ð
\95Û
\91¶
\82µ
\82Ü
\82·
88 //!
\92P
\8cê
\82É
\91Î
\82·
\82é
\8fî
\95ñ
\82ð
\8eæ
\93¾
\82µ
\82Ü
\82·
89 property Objects[ const name : string ] : TWordInfo
90 read GetObject write SetObject; default;
92 //!
\95¶
\8fÍ
\82É
\8aÜ
\82Ü
\82ê
\82é
\92P
\8cê
\82ð
\83J
\83E
\83\93\83g
\82µ
\82Ü
\82·
95 wordCount : TWordCount );
98 \brief Paul Graham
\96@
\82É
\8aî
\82Ã
\82¢
\82Ä
\95¶
\8fÍ
\82Ì
\92\8d\96Ú
\93x
\82ð
\8c\88\92è
\82µ
\82Ü
\82·
99 \return
\95¶
\8fÍ
\82Ì
\92\8d\96Ú
\93x (
\92\8d\96Ú
\82É
\92l
\82µ
\82È
\82¢ 0.0
\81`1.0
\92\8d\96Ú
\82·
\82×
\82«)
101 function CalcPaulGraham( wordCount : TWordCount ) : Extended;
104 \brief GaryRobinson
\96@
\82É
\8aî
\82Ã
\82¢
\82Ä
\95¶
\8fÍ
\82Ì
\92\8d\96Ú
\93x
\82ð
\8c\88\92è
\82µ
\82Ü
\82·
105 \return
\95¶
\8fÍ
\82Ì
\92\8d\96Ú
\93x (
\92\8d\96Ú
\82É
\92l
\82µ
\82È
\82¢ 0.0
\81`1.0
\92\8d\96Ú
\82·
\82×
\82«)
107 function CalcGaryRobinson( wordCount : TWordCount ) : Extended;
110 \brief GaryRobinson-Fisher
\96@
\82É
\8aî
\82Ã
\82¢
\82Ä
\95¶
\8fÍ
\82Ì
\92\8d\96Ú
\93x
\82ð
\8c\88\92è
\82µ
\82Ü
\82·
111 \return
\95¶
\8fÍ
\82Ì
\92\8d\96Ú
\93x (
\92\8d\96Ú
\82É
\92l
\82µ
\82È
\82¢ 0.0
\81`1.0
\92\8d\96Ú
\82·
\82×
\82«)
113 function CalcGaryRobinsonFisher( wordCount : TWordCount ) : Extended;
116 \brief
\95¶
\8fÍ
\82ð
\89ð
\90Í
117 \param text
\89ð
\90Í
\82·
\82é
\95¶
\8fÍ
118 \param wordCount
\89ð
\90Í
\82³
\82ê
\82½
\92P
\8cê
\83\8a\83X
\83g
\82ª
\95Ô
\82é
119 \param algorithm
\92\8d\96Ú
\93x
\82Ì
\8c\88\92è
\82É
\97p
\82¢
\82é
\83A
\83\8b\83S
\83\8a\83Y
\83\80\82ð
\8ew
\92è
\82µ
\82Ü
\82·
120 \return
\95¶
\8fÍ
\82Ì
\92\8d\96Ú
\93x (
\92\8d\96Ú
\82É
\92l
\82µ
\82È
\82¢ 0.0
\81`1.0
\92\8d\96Ú
\82·
\82×
\82«)
122 CountWord
\82Æ Calcxxxxx
\82ð
\82Ü
\82Æ
\82ß
\82Ä
\8eÀ
\8ds
\82·
\82é
\82¾
\82¯
\82Å
\82·
\81B
126 wordCount : TWordCount;
127 algorithm : TGikoBayesianAlgorithm = gbaGaryRobinsonFisher
131 \brief
\8aw
\8fK
\82·
\82é
132 \param wordCount Parse
\82Å
\89ð
\90Í
\82³
\82ê
\82½
\92P
\8cê
\83\8a\83X
\83g
133 \param isImportant
\92\8d\96Ú
\82·
\82×
\82«
\95¶
\8fÍ
\82Æ
\82µ
\82Ä
\8ao
\82¦
\82é
\82È
\82ç True
136 wordCount : TWordCount;
137 isImportant : Boolean );
140 \brief
\8aw
\8fK
\8c\8b\89Ê
\82ð
\96Y
\82ê
\82é
141 \param wordCount Parse
\82Å
\89ð
\90Í
\82³
\82ê
\82½
\92P
\8cê
\83\8a\83X
\83g
142 \param isImportant
\92\8d\96Ú
\82·
\82×
\82«
\95¶
\8fÍ
\82Æ
\82µ
\82Ä
\8ao
\82¦
\82ç
\82ê
\82Ä
\82¢
\82½
\82È
\82ç True
143 \warning
\8aw
\8fK
\8dÏ
\82Ý
\82Ì
\95¶
\8fÍ
\82©
\82Ç
\82¤
\82©
\82Í
\8am
\94F
\8fo
\97\88\82Ü
\82¹
\82ñ
\81B<br>
144 Learn
\82µ
\82Ä
\82¢
\82È
\82¢
\95¶
\8fÍ
\82â isImportant
\82ª
\8aÔ
\88á
\82Á
\82Ä
\82¢
\82é
\95¶
\8fÍ
\82ð
145 Forget
\82·
\82é
\82Æ
\83f
\81[
\83^
\83x
\81[
\83X
\82ª
\94j
\91¹
\82µ
\82Ü
\82·
\81B<br>
146 \8aw
\8fK
\8dÏ
\82Ý
\82©
\82Ç
\82¤
\82©
\82Í
\93Æ
\8e©
\82É
\8aÇ
\97\9d\82µ
\82Ä
\82
\82¾
\82³
\82¢
\81B
148 \91S
\82Ä
\82Ì
\8aw
\8fK
\8c\8b\89Ê
\82ð
\83N
\83\8a\83A
\82·
\82é
\82í
\82¯
\82Å
\82Í
\82 \82è
\82Ü
\82¹
\82ñ
\81B<br>
149 wordCount
\82ð
\93¾
\82½
\95¶
\8fÍ (Parse
\82Ì text
\88ø
\90\94)
\82Ì
\8aw
\8fK
\8c\8b\89Ê
\82Ì
\82Ý
\83N
\83\8a\83A
\82µ
\82Ü
\82·
\81B<br><br>
151 \8eå
\82É
\92\8d\96Ú
\95¶
\8fÍ
\82Æ
\94ñ
\92\8d\96Ú
\95¶
\8fÍ
\82ð
\90Ø
\82è
\91Ö
\82¦
\82é
\82½
\82ß
\82É Forget -> Learn
\82Ì
\8f\87\82Å
\8eg
\97p
\82µ
\82Ü
\82·
\81B
154 wordCount : TWordCount;
155 isImportant : Boolean );
158 //==================================================
160 //==================================================
163 SysUtils, Math, Windows;
166 GIKO_BAYESIAN_FILE_VERSION = '1.0';
168 Modes = (ModeWhite, ModeGraph, ModeAlpha, ModeHanKana, ModeNum,
169 ModeWGraph, ModeWAlpha, ModeWNum,
170 ModeWHira, ModeWKata, ModeWKanji);
172 CharMode1 : array [ 0..255 ] of Byte =
174 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
177 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
178 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
179 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1,
180 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
181 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
183 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
184 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
185 0, 1, 1, 1, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
186 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
187 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
188 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
189 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
190 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
193 //************************************************************
195 //************************************************************
197 //==============================
199 //==============================
200 function RemoveToken(var s: string;const delimiter: string): string;
204 p := AnsiPos(delimiter, s);
208 Result := Copy(s, 1, p - 1);
209 s := Copy(s, Length(Result) + Length(delimiter) + 1, Length(s));
212 //==============================
214 //==============================
215 function AbsSort( p1, p2 : Pointer ) : Integer;
220 v1 := Abs( Single( p1 ) - 0.5 );
221 v2 := Abs( Single( p2 ) - 0.5 );
231 //************************************************************
233 //************************************************************
234 constructor TWordCount.Create;
237 Duplicates := dupIgnore;
238 CaseSensitive := True;
243 destructor TWordCount.Destroy;
248 for i := Count - 1 downto 0 do
249 if Objects[ i ] <> nil then
256 //************************************************************
257 // TGikoBayesian class
258 //************************************************************
260 //==============================
262 //==============================
263 constructor TGikoBayesian.Create;
266 Duplicates := dupIgnore;
267 CaseSensitive := True;
272 //==============================
274 //==============================
275 destructor TGikoBayesian.Destroy;
280 for i := Count - 1 downto 0 do
281 if inherited Objects[ i ] <> nil then
282 inherited Objects[ i ].Free;
288 procedure TGikoBayesian.LoadFromFile( const filePath : string );
297 FFilePath := filePath;
299 if not FileExists( filePath ) then
302 sl := TStringList.Create;
304 sl.LoadFromFile( filePath );
306 for i := 1 to sl.Count - 1 do begin
308 name := RemoveToken( s, #1 );
309 info := TWordInfo.Create;
310 info.NormalWord := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
311 info.ImportantWord := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
312 info.NormalText := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
313 info.ImportantText := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
315 AddObject( name, info );
323 procedure TGikoBayesian.SaveToFile( const filePath : string );
331 FFilePath := filePath;
333 sl := TStringList.Create;
336 sl.Add( GIKO_BAYESIAN_FILE_VERSION );
338 for i := 0 to Count - 1 do begin
339 info := TWordInfo( inherited Objects[ i ] );
340 s := Strings[ i ] + #1
341 + Format('%x', [info.NormalWord]) + #1
342 + Format('%x', [info.ImportantWord]) + #1
343 + Format('%x', [info.NormalText]) + #1
344 + Format('%x', [info.ImportantText]);
349 sl.SaveToFile( filePath );
356 procedure TGikoBayesian.Save;
359 if FFilePath <> '' then
360 SaveToFile( FFilePath );
364 //==============================
366 //==============================
367 function TGikoBayesian.GetObject( const name : string ) : TWordInfo;
372 idx := IndexOf( name ); //
\8c\83\92x
376 Result := TWordInfo( inherited Objects[ idx ] );
380 //==============================
382 //==============================
383 procedure TGikoBayesian.SetObject( const name : string; value : TWordInfo );
388 idx := IndexOf( name );
390 AddObject( name, value )
392 inherited Objects[ idx ] := value;
397 //==============================
399 //==============================
400 procedure TGikoBayesian.CountWord(
402 wordCount : TWordCount );
404 Modes = (ModeWhite, ModeGraph, ModeAlpha, ModeHanKana, ModeNum,
405 ModeWGraph, ModeWAlpha, ModeWNum,
406 ModeWHira, ModeWKata, ModeWKanji);
408 p, tail, last : PChar;
409 mode, newMode : Modes;
413 delimiter : TStringList;
416 countInfo : TWordCountInfo;
418 KAKUJOSI = '
\82ð' + #10 + '
\82É' + #10 + '
\82ª' + #10 + '
\82Æ' + #10 + '
\82©
\82ç' +
419 #10 + '
\82Å' + #10 + '
\82Ö' + #10 + '
\82æ
\82è' + #10 + '
\82Ü
\82Å';
420 kKanji = [$80..$A0, $E0..$ff];
423 delimiter := TStringList.Create;
426 delimiter.Text := KAKUJOSI;
428 tail := p + Length( text );
431 while p < tail do begin
433 //
\95¶
\8e\9a\82Ì
\83^
\83C
\83v
\82ð
\94»
\95Ê
434 //
\81¦
\8bå
\93Ç
\93_
\82Í ModeGraph
\82É
\82È
\82é
\82Ì
\82Å
\8cÂ
\95Ê
\82É
\91Î
\89\9e\82µ
\82È
\82
\82Ä
\82à
\82¢
\82¢
435 // if Byte(Byte( p^ ) - $a1) < $5e then begin
436 if Byte( p^ ) in kKanji then begin
437 if p + 1 < tail then begin
438 ch := (PByte( p )^ shl 8) or PByte( p + 1 )^;
440 $8140: newMode := ModeWhite;
441 $8141..$824e: newMode := ModeWGraph;
442 $824f..$8258: newMode := ModeWNum;
443 $8260..$829a: newMode := ModeWAlpha;
444 $829f..$82f1: newMode := ModeWHira;
445 $8340..$8396: newMode := ModeWKata;
446 else newMode := ModeWKanji;
448 // '
\81J
\81K
\81['
\82Í
\95½
\89¼
\96¼
\81A
\82Ü
\82½
\82Í
\83J
\83^
\83J
\83i
\82É
\8aÜ
\82Ü
\82ê
\82é
449 if (mode = ModeWHira) or (mode = ModeWKata) then
450 if (ch = $814a) or (ch = $814b) or (ch = $815b) then
453 newMode := ModeWhite;
458 //
\8bæ
\90Ø
\82è
\82É
\82È
\82é
\95¶
\8e\9a\82ª
\82 \82é
\82©
\8c\9f\8d¸
\82·
\82é
459 if p + 3 < tail then begin // 3 = delimiter
\82Ì
\8dÅ
\91å
\8e\9a\90\94 - 1
460 for i := 0 to delimiter.Count - 1 do begin
462 p, PChar( delimiter[ i ] ), Length( delimiter[ i ] ) ) then begin
464 chSize := Length( delimiter[ i ] );
470 newMode := Modes( CharMode1[ Byte( p^ ) ] );
475 if (mode <> newMode) or delimited then begin
477 //
\95¶
\8e\9a\82Ì
\83^
\83C
\83v
\82ª
\95Ï
\8dX
\82³
\82ê
\82½
478 //
\82à
\82µ
\82
\82Í
\8bæ
\90Ø
\82è
\82É
\82È
\82é
\95¶
\8e\9a\82É
\91\98\8bö
\82µ
\82½
479 if mode <> ModeWhite then begin
480 SetLength( aWord, p - last );
481 CopyMemory( PChar( aWord ), last, p - last );
482 //aWord := Copy( last, 0, p - last );
483 idx := wordCount.IndexOf( aWord ); //
\92x
484 if idx < 0 then begin
485 countInfo := TWordCountInfo.Create;
486 wordCount.AddObject( aWord, countInfo );
488 countInfo := TWordCountInfo( wordCount.Objects[ idx ] );
490 countInfo.WordCount := countInfo.WordCount + 1;
501 if mode <> ModeWhite then begin
502 aWord := Copy( last, 0, p - last );
503 idx := wordCount.IndexOf( aWord );
504 if idx < 0 then begin
505 countInfo := TWordCountInfo.Create;
506 wordCount.AddObject( aWord, countInfo );
508 countInfo := TWordCountInfo( wordCount.Objects[ idx ] );
510 countInfo.WordCount := countInfo.WordCount + 1;
518 //==============================
520 //==============================
521 function TGikoBayesian.CalcPaulGraham( wordCount : TWordCount ) : Extended;
523 function p( const aWord : string ) : Single;
527 info := Objects[ aWord ];
530 else if info.NormalWord = 0 then
532 else if info.ImportantWord = 0 then
534 else if info.ImportantWord + info.NormalWord * 2 < 5 then
537 Result := ( info.ImportantWord / info.ImportantText ) /
538 ((info.NormalWord * 2 / info.NormalText ) +
539 (info.ImportantWord / info.ImportantText));
551 if wordCount.Count = 0 then
554 narray := TList.Create;
556 for i := 0 to wordCount.Count - 1 do begin
557 narray.Add( Pointer( p( wordCount[ i ] ) ) );
560 narray.Sort( AbsSort );
564 i := min( SAMPLE_COUNT, narray.Count );
568 s := s * Single( narray[ i ] );
569 q := q * (1 - Single( narray[ i ] ));
572 Result := s / (s + q);
579 //==============================
581 //==============================
582 function TGikoBayesian.CalcGaryRobinson( wordCount : TWordCount ) : Extended;
584 function p( const aWord : string ) : Single;
588 info := Objects[ aWord ];
591 else if info.ImportantWord = 0 then
593 else if info.NormalWord = 0 then
596 Result := ( info.ImportantWord / info.ImportantText ) /
597 ((info.NormalWord / info.NormalText ) +
598 (info.ImportantWord / info.ImportantText));
601 function f( cnt : Integer; n, mean : Single ) : Extended;
605 Result := ( (k * mean) + (cnt * n) ) / (k + cnt);
610 narray : array of Single;
612 countInfo : TWordCountInfo;
614 P1, Q1, R1 : Extended;
618 if wordCount.Count = 0 then begin
623 SetLength( narray, wordCount.Count );
625 for i := 0 to wordCount.Count - 1 do begin
626 n := p( wordCount[ i ] );
630 mean := mean / wordCount.Count;
635 for i := 0 to wordCount.Count - 1 do begin
636 countInfo := TWordCountInfo( wordCount.Objects[ i ] );
637 n := f( countInfo.WordCount, narray[ i ], mean );
638 if countInfo <> nil then
639 cnt := cnt + countInfo.WordCount;
640 P1 := P1 + Ln( 1 - n ) * countInfo.WordCount;
641 Q1 := Q1 + Ln( n ) * countInfo.WordCount;
645 P1 := 1 - Exp( P1 * (1 / cnt) );
646 Q1 := 1 - Exp( Q1 * (1 / cnt) );
648 if P1 + Q1 = 0 then begin
651 n := (P1 - Q1) / (P1 + Q1);
652 Result := (1 + n) / 2;
657 //==============================
658 // CalcGaryRobinsonFisher
659 //==============================
660 function TGikoBayesian.CalcGaryRobinsonFisher(
661 wordCount : TWordCount
664 function p( const aWord : string ) : Single;
668 info := Objects[ aWord ];
671 else if info.ImportantWord = 0 then
673 else if info.NormalWord = 0 then
676 Result := info.ImportantWord /
677 (info.ImportantWord + info.NormalWord *
678 info.ImportantText / info.NormalText);
681 function f( cnt : Integer; n, mean : Single ) : Extended;
685 Result := ( (k * mean) + (cnt * n) ) / (k + cnt);
688 function prbx( x2, degree : Extended ) : Extended;
701 while i < (degree / 2 - 1) do begin
702 term := term + ln( m / i );
703 sum := sum + exp( term );
716 narray : array of Single;
718 countInfo : TWordCountInfo;
721 important : Extended;
726 if wordCount.Count = 0 then begin
731 SetLength( narray, wordCount.Count );
733 for i := 0 to wordCount.Count - 1 do begin
734 n := p( wordCount[ i ] );
738 mean := mean / wordCount.Count;
748 for i := 0 to wordCount.Count - 1 do begin
749 countInfo := TWordCountInfo( wordCount.Objects[ i ] );
750 n := f( countInfo.WordCount, narray[ i ], mean );
751 if countInfo <> nil then
752 cnt := cnt + countInfo.WordCount;
754 P1 := P1 + Ln( 1 - n ) * countInfo.WordCount;
755 Q1 := Q1 + Ln( n ) * countInfo.WordCount;
757 P1 := P1 + Ln( 1 - n );
764 P1 := prbx( -2 * P1, 2 * cnt );
765 Q1 := prbx( -2 * Q1, 2 * cnt );
767 P1 := prbx( -2 * Ln( P1 ), 2 * cnt );
768 Q1 := prbx( -2 * Ln( Q1 ), 2 * cnt );
770 if P1 + Q1 = 0 then begin
773 Result := (1 + Q1 + P1) / 2;
778 //==============================
780 //==============================
781 function TGikoBayesian.Parse(
783 wordCount : TWordCount;
784 algorithm : TGikoBayesianAlgorithm
788 CountWord( text, wordCount );
790 gbaPaulGraham: Result := CalcPaulGraham( wordCount );
791 gbaGaryRobinson: Result := CalcGaryRobinson( wordCount );
792 gbaGaryRobinsonFisher:
793 Result := CalcGaryRobinsonFisher( wordCount );
799 //==============================
801 //==============================
802 procedure TGikoBayesian.Learn(
803 wordCount : TWordCount;
804 isImportant : Boolean );
807 wordinfo : TWordInfo;
808 countinfo : TWordCountInfo;
812 for i := 0 to wordCount.Count - 1 do begin
813 aWord := wordCount[ i ];
814 wordinfo := Objects[ aWord ];
815 countinfo := TWordCountInfo( wordCount.Objects[ i ] );
816 if wordinfo = nil then begin
817 wordinfo := TWordInfo.Create;
818 Objects[ aWord ] := wordinfo;
821 if isImportant then begin
822 wordinfo.ImportantWord := wordinfo.ImportantWord + countinfo.WordCount;
823 wordinfo.ImportantText := wordinfo.ImportantText + 1;
825 wordinfo.NormalWord := wordinfo.NormalWord + countinfo.WordCount;
826 wordinfo.NormalText := wordinfo.NormalText + 1;
832 //==============================
834 //==============================
835 procedure TGikoBayesian.Forget(
836 wordCount : TWordCount;
837 isImportant : Boolean );
840 wordinfo : TWordInfo;
841 countinfo : TWordCountInfo;
845 for i := 0 to wordCount.Count - 1 do begin
846 aWord := wordCount[ i ];
847 wordinfo := Objects[ aWord ];
848 if wordinfo = nil then
851 countinfo := TWordCountInfo( wordCount.Objects[ i ] );
852 if isImportant then begin
853 if wordInfo.ImportantText > 0 then begin
854 wordinfo.ImportantText := wordinfo.ImportantText - 1;
855 wordinfo.ImportantWord := wordinfo.ImportantWord - countinfo.WordCount;
858 if wordinfo.NormalText > 0 then begin
859 wordinfo.NormalText := wordinfo.NormalText - 1;
860 wordinfo.NormalWord := wordinfo.NormalWord - countinfo.WordCount;