5 \brief
\83x
\83C
\83W
\83A
\83\93\83t
\83B
\83\8b\83^
7 $Id: GikoBayesian.pas,v 1.10 2004/10/31 12:24:33 yoffy Exp $
12 //==================================================
14 //==================================================
17 //==================================================
19 //==================================================
21 {!***********************************************************
22 \brief
\92P
\8cê
\83v
\83\8d\83p
\83e
\83B
23 ************************************************************}
24 TWordInfo = class( TObject )
26 FNormalWord : Integer; //!<
\92Ê
\8fí
\82Ì
\92P
\8cê
\82Æ
\82µ
\82Ä
\93o
\8fê
\82µ
\82½
\89ñ
\90\94
27 FImportantWord : Integer; //!<
\92\8d\96Ú
\92P
\8cê
\82Æ
\82µ
\82Ä
\93o
\8fê
\82µ
\82½
\89ñ
\90\94
28 FNormalText : Integer; //!<
\92Ê
\8fí
\82Ì
\92P
\8cê
\82Æ
\82µ
\82Ä
\8aÜ
\82Ü
\82ê
\82Ä
\82¢
\82½
\95¶
\8fÍ
\82Ì
\90\94
29 FImportantText : Integer; //!<
\92\8d\96Ú
\92P
\8cê
\82Æ
\82µ
\82Ä
\8aÜ
\82Ü
\82ê
\82Ä
\82¢
\82½
\95¶
\8fÍ
\82Ì
\90\94
32 property NormalWord : Integer read FNormalWord write FNormalWord;
33 property ImportantWord : Integer read FImportantWord write FImportantWord;
34 property NormalText : Integer read FNormalText write FNormalText;
35 property ImportantText : Integer read FImportantText write FImportantText;
38 {!***********************************************************
39 \brief
\89ð
\90Í
\8dÏ
\82Ý
\92P
\8cê
\83v
\83\8d\83p
\83e
\83B
40 ************************************************************}
41 TWordCountInfo = class( TObject )
43 FWordCount : Integer; //!<
\92P
\8cê
\90\94
46 property WordCount : Integer read FWordCount write FWordCount;
49 {!***********************************************************
50 \brief
\89ð
\90Í
\8dÏ
\82Ý
\92P
\8cê
\83\8a\83X
\83g
51 ************************************************************}
52 // TWordCount = class( THashedStringList ) //
\8c\83\92x
53 TWordCount = class( TStringList )
56 destructor Destroy; override;
59 {!***********************************************************
60 \brief
\83t
\83B
\83\8b\83^
\83A
\83\8b\83S
\83\8a\83Y
\83\80
61 ************************************************************}
62 TGikoBayesianAlgorithm =
63 (gbaPaulGraham, gbaGaryRobinson, gbaGaryRobinsonFisher);
65 {!***********************************************************
66 \brief
\83x
\83C
\83W
\83A
\83\93\83t
\83B
\83\8b\83^
67 ************************************************************}
68 // TGikoBayesian = class( THashedStringList ) //
\8c\83\92x
69 TGikoBayesian = class( TStringList )
71 FFilePath : string; //!<
\93Ç
\82Ý
\8d\9e\82ñ
\82¾
\83t
\83@
\83C
\83\8b\83p
\83X
72 function GetObject( const name : string ) : TWordInfo;
73 procedure SetObject( const name : string; value : TWordInfo );
77 destructor Destroy; override;
79 //!
\83t
\83@
\83C
\83\8b\82©
\82ç
\8aw
\8fK
\97\9a\97ð
\82ð
\93Ç
\82Ý
\8fo
\82µ
\82Ü
\82·
80 procedure LoadFromFile( const filePath : string );
82 //!
\83t
\83@
\83C
\83\8b\82É
\8aw
\8fK
\97\9a\97ð
\82ð
\95Û
\91¶
\82µ
\82Ü
\82·
83 procedure SaveToFile( const filePath : string );
85 //!
\83t
\83@
\83C
\83\8b\82É
\8aw
\8fK
\97\9a\97ð
\82ð
\95Û
\91¶
\82µ
\82Ü
\82·
88 //!
\92P
\8cê
\82É
\91Î
\82·
\82é
\8fî
\95ñ
\82ð
\8eæ
\93¾
\82µ
\82Ü
\82·
89 property Objects[ const name : string ] : TWordInfo
90 read GetObject write SetObject; default;
92 //!
\95¶
\8fÍ
\82É
\8aÜ
\82Ü
\82ê
\82é
\92P
\8cê
\82ð
\83J
\83E
\83\93\83g
\82µ
\82Ü
\82·
95 wordCount : TWordCount );
98 \brief Paul Graham
\96@
\82É
\8aî
\82Ã
\82¢
\82Ä
\95¶
\8fÍ
\82Ì
\92\8d\96Ú
\93x
\82ð
\8c\88\92è
\82µ
\82Ü
\82·
99 \return
\95¶
\8fÍ
\82Ì
\92\8d\96Ú
\93x (
\92\8d\96Ú
\82É
\92l
\82µ
\82È
\82¢ 0.0
\81`1.0
\92\8d\96Ú
\82·
\82×
\82«)
101 function CalcPaulGraham( wordCount : TWordCount ) : Extended;
104 \brief GaryRobinson
\96@
\82É
\8aî
\82Ã
\82¢
\82Ä
\95¶
\8fÍ
\82Ì
\92\8d\96Ú
\93x
\82ð
\8c\88\92è
\82µ
\82Ü
\82·
105 \return
\95¶
\8fÍ
\82Ì
\92\8d\96Ú
\93x (
\92\8d\96Ú
\82É
\92l
\82µ
\82È
\82¢ 0.0
\81`1.0
\92\8d\96Ú
\82·
\82×
\82«)
107 function CalcGaryRobinson( wordCount : TWordCount ) : Extended;
110 \brief GaryRobinson-Fisher
\96@
\82É
\8aî
\82Ã
\82¢
\82Ä
\95¶
\8fÍ
\82Ì
\92\8d\96Ú
\93x
\82ð
\8c\88\92è
\82µ
\82Ü
\82·
111 \return
\95¶
\8fÍ
\82Ì
\92\8d\96Ú
\93x (
\92\8d\96Ú
\82É
\92l
\82µ
\82È
\82¢ 0.0
\81`1.0
\92\8d\96Ú
\82·
\82×
\82«)
113 function CalcGaryRobinsonFisher( wordCount : TWordCount ) : Extended;
116 \brief
\95¶
\8fÍ
\82ð
\89ð
\90Í
117 \param text
\89ð
\90Í
\82·
\82é
\95¶
\8fÍ
118 \param wordCount
\89ð
\90Í
\82³
\82ê
\82½
\92P
\8cê
\83\8a\83X
\83g
\82ª
\95Ô
\82é
119 \param algorithm
\92\8d\96Ú
\93x
\82Ì
\8c\88\92è
\82É
\97p
\82¢
\82é
\83A
\83\8b\83S
\83\8a\83Y
\83\80\82ð
\8ew
\92è
\82µ
\82Ü
\82·
120 \return
\95¶
\8fÍ
\82Ì
\92\8d\96Ú
\93x (
\92\8d\96Ú
\82É
\92l
\82µ
\82È
\82¢ 0.0
\81`1.0
\92\8d\96Ú
\82·
\82×
\82«)
122 CountWord
\82Æ Calcxxxxx
\82ð
\82Ü
\82Æ
\82ß
\82Ä
\8eÀ
\8ds
\82·
\82é
\82¾
\82¯
\82Å
\82·
\81B
126 wordCount : TWordCount;
127 algorithm : TGikoBayesianAlgorithm = gbaGaryRobinsonFisher
131 \brief
\8aw
\8fK
\82·
\82é
132 \param wordCount Parse
\82Å
\89ð
\90Í
\82³
\82ê
\82½
\92P
\8cê
\83\8a\83X
\83g
133 \param isImportant
\92\8d\96Ú
\82·
\82×
\82«
\95¶
\8fÍ
\82Æ
\82µ
\82Ä
\8ao
\82¦
\82é
\82È
\82ç True
136 wordCount : TWordCount;
137 isImportant : Boolean );
140 \brief
\8aw
\8fK
\8c\8b\89Ê
\82ð
\96Y
\82ê
\82é
141 \param wordCount Parse
\82Å
\89ð
\90Í
\82³
\82ê
\82½
\92P
\8cê
\83\8a\83X
\83g
142 \param isImportant
\92\8d\96Ú
\82·
\82×
\82«
\95¶
\8fÍ
\82Æ
\82µ
\82Ä
\8ao
\82¦
\82ç
\82ê
\82Ä
\82¢
\82½
\82È
\82ç True
143 \warning
\8aw
\8fK
\8dÏ
\82Ý
\82Ì
\95¶
\8fÍ
\82©
\82Ç
\82¤
\82©
\82Í
\8am
\94F
\8fo
\97\88\82Ü
\82¹
\82ñ
\81B<br>
144 Learn
\82µ
\82Ä
\82¢
\82È
\82¢
\95¶
\8fÍ
\82â isImportant
\82ª
\8aÔ
\88á
\82Á
\82Ä
\82¢
\82é
\95¶
\8fÍ
\82ð
145 Forget
\82·
\82é
\82Æ
\83f
\81[
\83^
\83x
\81[
\83X
\82ª
\94j
\91¹
\82µ
\82Ü
\82·
\81B<br>
146 \8aw
\8fK
\8dÏ
\82Ý
\82©
\82Ç
\82¤
\82©
\82Í
\93Æ
\8e©
\82É
\8aÇ
\97\9d\82µ
\82Ä
\82
\82¾
\82³
\82¢
\81B
148 \91S
\82Ä
\82Ì
\8aw
\8fK
\8c\8b\89Ê
\82ð
\83N
\83\8a\83A
\82·
\82é
\82í
\82¯
\82Å
\82Í
\82 \82è
\82Ü
\82¹
\82ñ
\81B<br>
149 wordCount
\82ð
\93¾
\82½
\95¶
\8fÍ (Parse
\82Ì text
\88ø
\90\94)
\82Ì
\8aw
\8fK
\8c\8b\89Ê
\82Ì
\82Ý
\83N
\83\8a\83A
\82µ
\82Ü
\82·
\81B<br><br>
151 \8eå
\82É
\92\8d\96Ú
\95¶
\8fÍ
\82Æ
\94ñ
\92\8d\96Ú
\95¶
\8fÍ
\82ð
\90Ø
\82è
\91Ö
\82¦
\82é
\82½
\82ß
\82É Forget -> Learn
\82Ì
\8f\87\82Å
\8eg
\97p
\82µ
\82Ü
\82·
\81B
154 wordCount : TWordCount;
155 isImportant : Boolean );
158 //==================================================
160 //==================================================
163 SysUtils, Math, Windows;
166 GIKO_BAYESIAN_FILE_VERSION = '1.0';
168 Modes = (ModeWhite, ModeGraph, ModeAlpha, ModeHanKana, ModeNum,
169 ModeWGraph, ModeWAlpha, ModeWNum,
170 ModeWHira, ModeWKata, ModeWKanji);
172 CharMode1 : array [ 0..255 ] of Byte =
174 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
177 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
178 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
179 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1,
180 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
181 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
183 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
184 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
185 0, 1, 1, 1, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
186 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
187 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
188 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
189 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
190 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
193 //************************************************************
195 //************************************************************
197 //==============================
199 //==============================
200 function RemoveToken(var s: string;const delimiter: string): string;
204 p := AnsiPos(delimiter, s);
208 Result := Copy(s, 1, p - 1);
209 s := Copy(s, Length(Result) + Length(delimiter) + 1, Length(s));
212 //==============================
214 //==============================
215 function AbsSort( p1, p2 : Pointer ) : Integer;
220 v1 := Abs( Single( p1 ) - 0.5 );
221 v2 := Abs( Single( p2 ) - 0.5 );
231 //************************************************************
233 //************************************************************
234 constructor TWordCount.Create;
237 Duplicates := dupIgnore;
238 CaseSensitive := True;
243 destructor TWordCount.Destroy;
248 for i := Count - 1 downto 0 do
249 if Objects[ i ] <> nil then
256 //************************************************************
257 // TGikoBayesian class
258 //************************************************************
260 //==============================
262 //==============================
263 constructor TGikoBayesian.Create;
266 Duplicates := dupIgnore;
267 CaseSensitive := True;
272 //==============================
274 //==============================
275 destructor TGikoBayesian.Destroy;
280 for i := Count - 1 downto 0 do
281 if inherited Objects[ i ] <> nil then
282 inherited Objects[ i ].Free;
288 procedure TGikoBayesian.LoadFromFile( const filePath : string );
297 FFilePath := filePath;
299 if not FileExists( filePath ) then
302 sl := TStringList.Create;
304 sl.LoadFromFile( filePath );
306 for i := 1 to sl.Count - 1 do begin
308 name := RemoveToken( s, #1 );
309 info := TWordInfo.Create;
310 info.NormalWord := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
311 info.ImportantWord := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
312 info.NormalText := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
313 info.ImportantText := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
315 AddObject( name, info );
323 procedure TGikoBayesian.SaveToFile( const filePath : string );
331 FFilePath := filePath;
333 sl := TStringList.Create;
336 sl.Add( GIKO_BAYESIAN_FILE_VERSION );
338 for i := 0 to Count - 1 do begin
339 info := TWordInfo( inherited Objects[ i ] );
340 s := Strings[ i ] + #1
341 + Format('%x', [info.NormalWord]) + #1
342 + Format('%x', [info.ImportantWord]) + #1
343 + Format('%x', [info.NormalText]) + #1
344 + Format('%x', [info.ImportantText]);
349 sl.SaveToFile( filePath );
356 procedure TGikoBayesian.Save;
359 if FFilePath <> '' then
360 SaveToFile( FFilePath );
364 //==============================
366 //==============================
367 function TGikoBayesian.GetObject( const name : string ) : TWordInfo;
372 idx := IndexOf( name ); //
\8c\83\92x
376 Result := TWordInfo( inherited Objects[ idx ] );
380 //==============================
382 //==============================
383 procedure TGikoBayesian.SetObject( const name : string; value : TWordInfo );
388 idx := IndexOf( name );
390 AddObject( name, value )
392 inherited Objects[ idx ] := value;
397 //==============================
399 //==============================
400 procedure TGikoBayesian.CountWord(
402 wordCount : TWordCount );
404 Modes = (ModeWhite, ModeGraph, ModeAlpha, ModeHanKana, ModeNum,
405 ModeWGraph, ModeWAlpha, ModeWNum,
406 ModeWHira, ModeWKata, ModeWKanji);
408 p, tail, last : PChar;
409 mode, newMode : Modes;
413 delimiter : TStringList;
416 countInfo : TWordCountInfo;
418 KAKUJOSI = '
\82ð' + #10 + '
\82É' + #10 + '
\82ª' + #10 + '
\82Æ' + #10 + '
\82©
\82ç' +
419 #10 + '
\82Å' + #10 + '
\82Ö' + #10 + '
\82æ
\82è' + #10 + '
\82Ü
\82Å';
420 kKanji = [$80..$A0, $E0..$ff];
423 delimiter := TStringList.Create;
426 delimiter.Text := KAKUJOSI;
428 tail := p + Length( text );
431 while p < tail do begin
433 //
\95¶
\8e\9a\82Ì
\83^
\83C
\83v
\82ð
\94»
\95Ê
434 //
\81¦
\8bå
\93Ç
\93_
\82Í ModeGraph
\82É
\82È
\82é
\82Ì
\82Å
\8cÂ
\95Ê
\82É
\91Î
\89\9e\82µ
\82È
\82
\82Ä
\82à
\82¢
\82¢
435 // if Byte(Byte( p^ ) - $a1) < $5e then begin
436 if Byte( p^ ) in kKanji then begin
437 if p + 1 < tail then begin
438 ch := (PByte( p )^ shl 8) or PByte( p + 1 )^;
440 $8140: newMode := ModeWhite;
441 $8141..$824e: newMode := ModeWGraph;
442 $824f..$8258: newMode := ModeWNum;
443 $8260..$829a: newMode := ModeWAlpha;
444 $829f..$82f1: newMode := ModeWHira;
445 $8340..$8396: newMode := ModeWKata;
446 else newMode := ModeWKanji;
448 // '
\81J
\81K
\81['
\82Í
\95½
\89¼
\96¼
\81A
\82Ü
\82½
\82Í
\83J
\83^
\83J
\83i
\82É
\8aÜ
\82Ü
\82ê
\82é
449 if (mode = ModeWHira) or (mode = ModeWKata) then
450 if (ch = $814a) or (ch = $814b) or (ch = $815b) then
453 newMode := ModeWhite;
458 //
\8bæ
\90Ø
\82è
\82É
\82È
\82é
\95¶
\8e\9a\82ª
\82 \82é
\82©
\8c\9f\8d¸
\82·
\82é
459 if p + 3 < tail then begin // 3 = delimiter
\82Ì
\8dÅ
\91å
\8e\9a\90\94 - 1
460 for i := 0 to delimiter.Count - 1 do begin
462 p, PChar( delimiter[ i ] ), Length( delimiter[ i ] ) ) then begin
464 chSize := Length( delimiter[ i ] );
470 newMode := Modes( CharMode1[ Byte( p^ ) ] );
475 if (mode <> newMode) or delimited then begin
477 //
\95¶
\8e\9a\82Ì
\83^
\83C
\83v
\82ª
\95Ï
\8dX
\82³
\82ê
\82½
478 //
\82à
\82µ
\82
\82Í
\8bæ
\90Ø
\82è
\82É
\82È
\82é
\95¶
\8e\9a\82É
\91\98\8bö
\82µ
\82½
479 if mode <> ModeWhite then begin
480 SetLength( aWord, p - last );
481 CopyMemory( PChar( aWord ), last, p - last );
482 //aWord := Copy( last, 0, p - last );
483 idx := wordCount.IndexOf( aWord ); //
\92x
484 if idx < 0 then begin
485 countInfo := TWordCountInfo.Create;
486 wordCount.AddObject( aWord, countInfo );
488 countInfo := TWordCountInfo( wordCount.Objects[ idx ] );
490 countInfo.WordCount := countInfo.WordCount + 1;
501 if mode <> ModeWhite then begin
502 aWord := Copy( last, 0, p - last );
503 idx := wordCount.IndexOf( aWord );
504 if idx < 0 then begin
505 countInfo := TWordCountInfo.Create;
506 wordCount.AddObject( aWord, countInfo );
508 countInfo := TWordCountInfo( wordCount.Objects[ idx ] );
510 countInfo.WordCount := countInfo.WordCount + 1;
518 //==============================
520 //==============================
521 function TGikoBayesian.CalcPaulGraham( wordCount : TWordCount ) : Extended;
523 function p( const aWord : string ) : Single;
527 info := Objects[ aWord ];
530 else if info.NormalWord = 0 then
532 else if info.ImportantWord = 0 then
534 else if info.ImportantWord + info.NormalWord * 2 < 5 then
537 Result := ( info.ImportantWord / info.ImportantText ) /
538 ((info.NormalWord * 2 / info.NormalText ) +
539 (info.ImportantWord / info.ImportantText));
551 if wordCount.Count = 0 then
554 narray := TList.Create;
556 for i := 0 to wordCount.Count - 1 do begin
557 narray.Add( Pointer( p( wordCount[ i ] ) ) );
560 narray.Sort( AbsSort );
564 i := min( SAMPLE_COUNT, narray.Count );
568 s := s * Single( narray[ i ] );
569 q := q * (1 - Single( narray[ i ] ));
572 Result := s / (s + q);
579 //==============================
581 //==============================
582 function TGikoBayesian.CalcGaryRobinson( wordCount : TWordCount ) : Extended;
584 function p( const aWord : string ) : Single;
588 info := Objects[ aWord ];
591 else if info.ImportantWord = 0 then
593 else if info.NormalWord = 0 then
596 Result := ( info.ImportantWord / info.ImportantText ) /
597 ((info.NormalWord / info.NormalText ) +
598 (info.ImportantWord / info.ImportantText));
601 function f( cnt : Integer; n, mean : Single ) : Extended;
605 Result := ( (k * mean) + (cnt * n) ) / (k + cnt);
610 narray : array of Single;
612 countInfo : TWordCountInfo;
614 P1, Q1, R1 : Extended;
618 if wordCount.Count = 0 then begin
623 SetLength( narray, wordCount.Count );
625 for i := 0 to wordCount.Count - 1 do begin
626 n := p( wordCount[ i ] );
630 mean := mean / wordCount.Count;
634 for i := 0 to wordCount.Count - 1 do begin
635 countInfo := TWordCountInfo( wordCount.Objects[ i ] );
636 n := f( countInfo.WordCount, narray[ i ], mean );
637 P1 := P1 * ( 1 - n );
640 cnt := wordCount.Count;
644 P1 := 1 - Power( P1, 1 / cnt );
645 Q1 := 1 - Power( Q1, 1 / cnt );
647 if P1 + Q1 = 0 then begin
650 n := (P1 - Q1) / (P1 + Q1);
651 Result := (1 + n) / 2;
656 //==============================
657 // CalcGaryRobinsonFisher
658 //==============================
659 function TGikoBayesian.CalcGaryRobinsonFisher(
660 wordCount : TWordCount
663 function p( const aWord : string ) : Single;
667 info := Objects[ aWord ];
670 else if info.ImportantWord = 0 then
672 else if info.NormalWord = 0 then
675 Result := info.ImportantWord /
676 (info.ImportantWord + info.NormalWord *
677 info.ImportantText / info.NormalText);
680 function f( cnt : Integer; n, mean : Single ) : Extended;
684 Result := ( (k * mean) + (cnt * n) ) / (k + cnt);
687 function prbx( x2, degree : Extended ) : Extended;
700 while i < (degree / 2 - 1) do begin
701 term := term + ln( m / i );
702 sum := sum + exp( term );
715 narray : array of Single;
717 countInfo : TWordCountInfo;
720 important : Extended;
725 if wordCount.Count = 0 then begin
730 SetLength( narray, wordCount.Count );
732 for i := 0 to wordCount.Count - 1 do begin
733 n := p( wordCount[ i ] );
737 mean := mean / wordCount.Count;
747 for i := 0 to wordCount.Count - 1 do begin
748 countInfo := TWordCountInfo( wordCount.Objects[ i ] );
749 n := f( countInfo.WordCount, narray[ i ], mean );
750 if countInfo <> nil then
751 cnt := cnt + countInfo.WordCount;
753 P1 := P1 + Ln( 1 - n ) * countInfo.WordCount;
754 Q1 := Q1 + Ln( n ) * countInfo.WordCount;
756 P1 := P1 + Ln( 1 - n );
763 P1 := prbx( -2 * P1, 2 * cnt );
764 Q1 := prbx( -2 * Q1, 2 * cnt );
766 P1 := prbx( -2 * Ln( P1 ), 2 * cnt );
767 Q1 := prbx( -2 * Ln( Q1 ), 2 * cnt );
769 if P1 + Q1 = 0 then begin
772 Result := (1 + Q1 + P1) / 2;
777 //==============================
779 //==============================
780 function TGikoBayesian.Parse(
782 wordCount : TWordCount;
783 algorithm : TGikoBayesianAlgorithm
787 CountWord( text, wordCount );
789 gbaPaulGraham: Result := CalcPaulGraham( wordCount );
790 gbaGaryRobinson: Result := CalcGaryRobinson( wordCount );
791 gbaGaryRobinsonFisher:
792 Result := CalcGaryRobinsonFisher( wordCount );
798 //==============================
800 //==============================
801 procedure TGikoBayesian.Learn(
802 wordCount : TWordCount;
803 isImportant : Boolean );
806 wordinfo : TWordInfo;
807 countinfo : TWordCountInfo;
811 for i := 0 to wordCount.Count - 1 do begin
812 aWord := wordCount[ i ];
813 wordinfo := Objects[ aWord ];
814 countinfo := TWordCountInfo( wordCount.Objects[ i ] );
815 if wordinfo = nil then begin
816 wordinfo := TWordInfo.Create;
817 Objects[ aWord ] := wordinfo;
820 if isImportant then begin
821 wordinfo.ImportantWord := wordinfo.ImportantWord + countinfo.WordCount;
822 wordinfo.ImportantText := wordinfo.ImportantText + 1;
824 wordinfo.NormalWord := wordinfo.NormalWord + countinfo.WordCount;
825 wordinfo.NormalText := wordinfo.NormalText + 1;
831 //==============================
833 //==============================
834 procedure TGikoBayesian.Forget(
835 wordCount : TWordCount;
836 isImportant : Boolean );
839 wordinfo : TWordInfo;
840 countinfo : TWordCountInfo;
844 for i := 0 to wordCount.Count - 1 do begin
845 aWord := wordCount[ i ];
846 wordinfo := Objects[ aWord ];
847 if wordinfo = nil then
850 countinfo := TWordCountInfo( wordCount.Objects[ i ] );
851 if isImportant then begin
852 if wordInfo.ImportantText > 0 then begin
853 wordinfo.ImportantText := wordinfo.ImportantText - 1;
854 wordinfo.ImportantWord := wordinfo.ImportantWord - countinfo.WordCount;
857 if wordinfo.NormalText > 0 then begin
858 wordinfo.NormalText := wordinfo.NormalText - 1;
859 wordinfo.NormalWord := wordinfo.NormalWord - countinfo.WordCount;