OSDN Git Service

・スレタイの特定ワードを非表示にする機能に「©bbspink.com」も追加
[gikonavigoeson/gikonavi.git] / GikoBayesian.pas
index 8291c82..55791a3 100644 (file)
@@ -4,15 +4,19 @@ unit GikoBayesian;
 \file          GikoBayesian.pas
 \brief \83x\83C\83W\83A\83\93\83t\83B\83\8b\83^
 
-$Id: GikoBayesian.pas,v 1.2 2004/10/21 01:20:34 yoffy Exp $
+
+$Id: GikoBayesian.pas,v 1.22 2009/01/31 15:47:15 h677 Exp $
 }
 
+//! \95½\89¼\96¼\82ð\8e«\8f\91\82É\8aÜ\82ß\82È\82¢
+{$DEFINE GIKO_BAYESIAN_NO_HIRAGANA_DIC}
+
 interface
 
 //==================================================
 uses
 //==================================================
-       Classes, IniFiles;
+       Classes;
 
 //==================================================
 type
@@ -50,8 +54,9 @@ type
        \brief \89ð\90Í\8dÏ\82Ý\92P\8cê\83\8a\83X\83g
        ************************************************************}
 //     TWordCount      = class( THashedStringList )    // \8c\83\92x
-       TWordCount      = class( TStringList )  // \92x
+       TWordCount      = class( TStringList )
        public
+               constructor Create;
                destructor Destroy; override;
        end;
 
@@ -59,12 +64,13 @@ type
        \brief \83t\83B\83\8b\83^\83A\83\8b\83S\83\8a\83Y\83\80
        ************************************************************}
        TGikoBayesianAlgorithm =
-               (gbaPaulGraham, gbaGaryRonbinson{, gbaGaryRonbinsonFisher});
+               (gbaPaulGraham, gbaGaryRobinson, gbaGaryRobinsonFisher);
 
        {!***********************************************************
        \brief \83x\83C\83W\83A\83\93\83t\83B\83\8b\83^
        ************************************************************}
-       TGikoBayesian = class( THashedStringList )
+//     TGikoBayesian = class( THashedStringList )      // \8c\83\92x
+       TGikoBayesian = class( TStringList )
        private
                FFilePath       : string;       //!< \93Ç\82Ý\8d\9e\82ñ\82¾\83t\83@\83C\83\8b\83p\83X
                function GetObject( const name : string ) : TWordInfo;
@@ -104,7 +110,11 @@ type
                }
                function CalcGaryRobinson( wordCount : TWordCount ) : Extended;
 
-//             function CalcGaryRobinsonFisher( wordCount : TWordCount ) : Extended;
+               {!
+               \brief  GaryRobinson-Fisher \96@\82É\8aî\82Ã\82¢\82Ä\95\8fÍ\82Ì\92\8d\96Ú\93x\82ð\8c\88\92è\82µ\82Ü\82·
+               \return \95\8fÍ\82Ì\92\8d\96Ú\93x (\92\8d\96Ú\82É\92l\82µ\82È\82¢ 0.0\81`1.0 \92\8d\96Ú\82·\82×\82«)
+               }
+               function CalcGaryRobinsonFisher( wordCount : TWordCount ) : Extended;
 
                {!
                \brief  \95\8fÍ\82ð\89ð\90Í
@@ -118,7 +128,7 @@ type
                function Parse(
                        const text                              : string;
                        wordCount                                       : TWordCount;
-                       algorithm                                       : TGikoBayesianAlgorithm = gbaGaryRonbinson
+                       algorithm                                       : TGikoBayesianAlgorithm = gbaGaryRobinsonFisher
                ) : Extended;
 
                {!
@@ -154,20 +164,36 @@ implementation
 //==================================================
 
 uses
-       SysUtils, Math
-{$IFDEF BENCHMARK}
-       , Windows, Dialogs
-{$ENDIF}
-       ;
-
-{$IFDEF BENCHMARK}
-var
-       b1, b2, b3, b4, b5, b6, b7, b8, b9, b10 : Int64; // benchmark
-{$ENDIF}
+       SysUtils, Math, Windows,
+       MojuUtils;
 
 const
        GIKO_BAYESIAN_FILE_VERSION      = '1.0';
-       kYofKanji : TSysCharSet                 = [#$80..#$A0, #$E0..#$ff];
+{
+       Modes                           = (ModeWhite, ModeGraph, ModeAlpha, ModeHanKana, ModeNum,
+                                                               ModeWGraph, ModeWAlpha, ModeWNum,
+                                                               ModeWHira, ModeWKata, ModeWKanji);
+}
+       CharMode1 : array [ 0..255 ] of Byte =
+       (
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+               2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
+               1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+               3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1,
+               1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+               3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
+
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 1, 1, 1, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+               4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+               4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+               4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+       );
 
 //************************************************************
 // misc
@@ -210,6 +236,15 @@ end;
 //************************************************************
 // TWordCount class
 //************************************************************
+constructor TWordCount.Create;
+begin
+
+               Duplicates              := dupIgnore;
+               CaseSensitive   := True;
+               Sorted                          := True;
+
+end;
+
 destructor TWordCount.Destroy;
 var
        i : Integer;
@@ -233,12 +268,9 @@ end;
 constructor TGikoBayesian.Create;
 begin
 
-{$IFDEF BENCHMARK}
-       b1:=0; b2:=0; b3:=0; b4:=0; b5:=0; b6:=0; b7:=0; b8:=0; b9:=0; b10:=0;
-{$ENDIF}
-
-       Duplicates := dupIgnore;
-       Sorted := True;
+       Duplicates              := dupIgnore;
+       CaseSensitive   := True;
+       Sorted                          := True;
 
 end;
 
@@ -278,12 +310,12 @@ begin
 
                for i := 1 to sl.Count - 1 do begin
                        s := sl[ i ];
-                       name := RemoveToken( s, #1 );
+                       name := GikoBayesian.RemoveToken( s, #1 );
                        info := TWordInfo.Create;
-                       info.NormalWord                 := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
-                       info.ImportantWord      := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
-                       info.NormalText                 := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
-                       info.ImportantText      := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
+                       info.NormalWord                 := StrToIntDef( '$' + GikoBayesian.RemoveToken( s, #1 ), 0 );
+                       info.ImportantWord      := StrToIntDef( '$' + GikoBayesian.RemoveToken( s, #1 ), 0 );
+                       info.NormalText                 := StrToIntDef( '$' + GikoBayesian.RemoveToken( s, #1 ), 0 );
+                       info.ImportantText      := StrToIntDef( '$' + GikoBayesian.RemoveToken( s, #1 ), 0 );
 
                        AddObject( name, info );
                end;
@@ -301,11 +333,6 @@ var
        info    : TWordInfo;
 begin
 
-{$IFDEF BENCHMARK}
-       ShowMessage(IntToStr(b1)+'/'+IntToStr(b2)+'/'+IntToStr(b3)+'/'+IntToStr(b4)+
-               '/'+IntToStr(b5)+'/'+IntToStr(b6));
-{$ENDIF}
-
        FFilePath := filePath;
 
        sl := TStringList.Create;
@@ -347,11 +374,10 @@ var
        idx : Integer;
 begin
 
-       idx := IndexOf( name );
-       if idx < 0 then
-               Result := nil
+       if Find( name, idx ) then
+               Result := TWordInfo( inherited Objects[ idx ] )
        else
-               Result := TWordInfo( inherited Objects[ idx ] );
+               Result := nil;
 
 end;
 
@@ -363,11 +389,10 @@ var
        idx : Integer;
 begin
 
-       idx := IndexOf( name );
-       if idx < 0 then
-               AddObject( name, value )
+       if Find( name, idx ) then
+               inherited Objects[ idx ] := value
        else
-               inherited Objects[ idx ] := value;
+               AddObject( name, value );
 
 end;
 
@@ -383,51 +408,213 @@ type
                                                                ModeWGraph, ModeWAlpha, ModeWNum,
                                                                ModeWHira, ModeWKata, ModeWKanji);
 var
-       p, tail, last   : PChar;
-       mode, newMode   : Modes;
-       aWord                                   : string;
-       ch                                              : Longword;
-       chSize                          : Integer;
-       delimiter                       : TStringList;
-       delimited                       : Boolean;
-       i, idx                          : Integer;
-       countInfo                       : TWordCountInfo;
-{$IFDEF BENCHMARK}
-       t1, t2                          : Int64;
+       p, tail, last                   : PChar;
+       mode, newMode                   : Modes;
+       ch                                                              : Longword;
+       chSize                                          : Integer;
+       wHiraDelimiter          : TStringList;
+       wHiraFinalDelimiter     : TStringList;
+       wKanjiDelimiter         : TStringList;
+       words                                                   : TStringList;
+       aWord                                                   : string;
+//     countInfo                                       : TWordCountInfo;
+
+       function cutBoth( _aWord : string; _delim : TStringList ) : string;
+       var
+               _i                      : Integer;
+       begin
+               for _i := 0 to _delim.Count - 1 do begin
+                       _aWord := CustomStringReplace(
+                               _aWord,
+                               _delim[ _i ],
+                               #10 + _delim[ _i ] + #10, False );
+               end;
+               Result := _aWord;
+       end;
+
+       function cutFirst( _aWord : string; _delim : TStringList ) : string;
+       var
+               _i                      : Integer;
+       begin
+               for _i := 0 to _delim.Count - 1 do begin
+                       _aWord := CustomStringReplace(
+                               _aWord,
+                               _delim[ _i ],
+                               #10 + _delim[ _i ], False );
+               end;
+               Result := _aWord;
+       end;
+
+       function cutFinal( _aWord : string; _delim : TStringList ) : string;
+       var
+               _i                      : Integer;
+       begin
+               for _i := 0 to _delim.Count - 1 do begin
+                       _aWord := CustomStringReplace(
+                               _aWord,
+                               _delim[ _i ],
+                               _delim[ _i ] + #10, False );
+               end;
+               Result := _aWord;
+       end;
+
+       procedure addWord( _dst : TWordCount; _words : TStringList );
+       var
+               _aWord                  : string;
+               _i, _idx                : Integer;
+               _countInfo      : TWordCountInfo;
+       begin
+               for _i := 0 to _words.Count - 1 do begin
+                       _aWord := _words[ _i ];
+                       if Length( _aWord ) > 0 then begin
+                               if _dst.Find( _aWord, _idx ) then begin
+                                       _countInfo := TWordCountInfo( _dst.Objects[ _idx ] );
+                               end else begin
+                                       _countInfo := TWordCountInfo.Create;
+                                       _dst.AddObject( _aWord, _countInfo );
+                               end;
+                               _countInfo.WordCount := _countInfo.WordCount + 1;
+                       end;
+               end;
+       end;
+
+       function changeMode( _aWord : string; _mode : Modes ) : string;
+       var
+               _i                                                                      : Integer;
+               _aWord2                                                 : string;
+               _pWord, _pWord2                 : PChar;
+               _pWordTail, _pFound     : PChar;
+       const
+               _delim : string = #10;
+       begin
+{$IFDEF GIKO_BAYESIAN_NO_HIRAGANA_DIC}
+               if mode = ModeWHira then begin
+                       Result := '';
+                       Exit;
+               end;
 {$ENDIF}
+               if Ord( _mode ) >= Ord( ModeWGraph ) then begin
+                       // \93ú\96{\8cê
+                       // \83X\83y\81[\83X\82ð\8bl\82ß\82é
+                       _aWord := CustomStringReplace( _aWord, ' ', '', False );
+                       _aWord := CustomStringReplace( _aWord, '\81@', '', False );
+
+                       // \83f\83\8a\83~\83^\82Å\92P\8cê\95ª\82¯
+                       case mode of
+                       ModeWHira:
+                               begin
+                                       _aWord := cutFinal( _aWord, wHiraFinalDelimiter );
+                                       Result := cutBoth( _aWord, wHiraDelimiter );
+                               end;
+
+                       ModeWKanji:
+                               begin
+                                       // \83f\83\8a\83~\83^\82Å\92P\8cê\95ª\82¯
+                                       _aWord := cutBoth( _aWord, wKanjiDelimiter );
+                                       // 4 byte (2 \8e\9a\82¸\82Â\82Å\92P\8cê\95ª\82¯
+                                       _pWord := PChar( _aWord );
+                                       _i := Length( _aWord );
+                                       _pWordTail := _pWord + _i;
+                                       SetLength( _aWord2, _i + (_i shr 2) );
+                                       _pWord2 := PChar( _aWord2 );
+
+                                       while _pWord < _pWordTail do begin
+                                               _pFound := AnsiStrPos( _pWord, PChar( _delim ) );
+                                               if _pFound = nil then
+                                                       _pFound := _pWordTail;
+                                               _pFound := _pFound - 3;
+
+                                               while _pWord <= _pFound do begin
+                                                       CopyMemory( _pWord2, _pWord, 4 ); _pWord2[ 4 ] := #10;
+                                                       _pWord2 := _pWord2 + 5; _pWord := _pWord + 4;
+                                               end;
+                                               _i := _pFound + 4 - _pWord; // 4 = 3 + #10
+                                               CopyMemory( _pWord2, _pWord, _i );
+                                               _pWord2 := _pWord2 + _i; _pWord := _pWord + _i;
+                                       end;
+                                       if _pWord < _pWordTail then begin
+                                               _i := _pWordTail - _pWord;
+                                               CopyMemory( _pWord2, _pWord, _i );
+                                               _pWord2 := _pWord2 + _i;
+                                       end;
+                                       SetLength( _aWord2, _pWord2 - PChar( _aWord2 ) );
+
+                                       Result := _aWord2;
+                               end;
+
+                       else
+                               Result := _aWord;
+                       end;
+               end else begin
+                       Result := _aWord;
+               end;
+       end;
 const
-       KAKUJOSI = '\82ð' + #10 + '\82É' + #10 + '\82ª' + #10 + '\82Æ' + #10 + '\82©\82ç' +
-               #10 + '\82Å' + #10 + '\82Ö' + #10 + '\82æ\82è' + #10 + '\82Ü\82Å';
+       WHIRA_DELIMITER = '\82ð' + #10 + '\82É' + #10 + '\82ª' + #10 + '\82Æ' + #10 + '\82©\82ç'
+               + #10 + '\82Ö' + #10 + '\82æ\82è' + #10 + '\82Ü\82Å'+ #10 + '\82Å'
+               + #10 + '\82±\82±' + #10 + '\82»\82±' + #10 + '\82Ç\82±'
+               + #10 + '\82±\82ê' + #10 + '\82»\82ê' + #10 + '\82 \82ê' + #10 + '\82Ç\82ê'
+               + #10 + '\82±\82Ì' + #10 + '\82»\82Ì' + #10 + '\82 \82Ì' + #10 + '\82Ç\82Ì'
+               + #10 + '\82±\82¤' + #10 + '\82»\82¤' + #10 + '\82 \82 ' + #10 + '\82Ç\82¤'
+               + #10 + '\82±\82ñ\82È' + #10 + '\82»\82ñ\82È' + #10 + '\82 \82ñ\82È' + #10 + '\82Ç\82ñ\82È'
+               + #10 + '\82ê\82½' + #10 + '\82ê\82Ä' + #10 + '\82ê\82ê' + #10 + '\82ê\82ë'
+               + #10 + '\82ê\82é' + #10 + '\82ç\82ê\82é'
+               + #10 + '\82Å\82·' + #10 + '\82Ü\82·' + #10 + '\82Ü\82¹\82ñ'
+               + #10 + '\82Å\82µ\82½' + #10 + '\82Ü\82µ\82½'
+               + #10 + '\82·\82é' + #10 + '\82µ\82È\82¢' + #10 + '\82³\82ê\82é' + #10 + '\82³\82ê\82È\82¢'
+               ;
+       WKANJI_DELIMITER = '\93I' + #10 + '\90«' + #10 + '\8e®' + #10 + '\89»' + #10 + '\96@'
+               + #10 + '\95s' + #10 + '\96³' + #10 + '\94ñ' + #10 + '\94½'
+               ;
+       WHIRA_FINAL_DELIMITER = '\82Á\82½' + #10 + '\82Á\82Ä'
+               ;{
+               + #10 + '\82æ\82Á\82Ä' + #10 + '\82µ\82½\82ª\82Á\82Ä' + #10 + '\82È\82Ì\82Å'
+               + #10 + '\82¾\82©\82ç' + #10 + '\82Å\82·\82©\82ç'
+               + #10 + '\82Ü\82½'
+               + #10 + '\82µ\82©\82µ' + #10 + '\82¾\82ª' + #10 + '\82¯\82Ç' + #10 + '\82¯\82ê\82Ç'
+               + #10 + '\82â\82Í\82è' + #10 + '\82â\82Á\82Ï\82è'
+               + #10 + '\82Å\82µ' + #10 + '\82¾\82ë'
+               + #10 + '\82·\82é' + #10 + '\82µ\82È\82¢' + #10 + '\82µ\82½' + #10 + '\82µ\82È\82¢'
+               ;}
+       // '\81[' \82ð '\82\9f\82¡\82£\82¥\82§' \82É\81B
+       HA_LINE = '\82 \82©\82³\82½\82È\82Í\82Ü\82â\82ç\82í\82ª\82´\82¾\82Î\82Ï\82\9f\82ì';
+       HI_LINE = '\82¢\82«\82µ\82¿\82É\82Ð\82Ý\82è\82î\82¬\82\82Ñ\82Ò\82¡';
+       HU_LINE = '\82¤\82­\82·\82Â\82Ê\82Ó\82Þ\82ä\82é\82®\82Ô\82Õ\82£';
+       HE_LINE = '\82¦\82¯\82¹\82Ä\82Ë\82Ö\82ß\82ê\82ï\82°\82×\82Ø\82¥';
+       HO_LINE = '\82¨\82±\82»\82Æ\82Ì\82Ù\82à\82æ\82ë\82ð\82²\82Ú\82Û\82§';
+       KA_LINE = '\83A\83J\83T\83^\83i\83n\83}\83\84\83\89\83\8f\83K\83U\83_\83o\83p\83@\83\95\83\8e';
+       KI_LINE = '\83C\83L\83V\83`\83j\83q\83~\83\8a\83\90\83M\83W\83r\83s\83B';
+       KU_LINE = '\83E\83N\83X\83c\83k\83t\83\80\83\86\83\8b\83O\83u\83v\83D\83\94';
+       KE_LINE = '\83G\83P\83Z\83e\83l\83w\83\81\83\8c\83\91\83Q\83x\83y\83F\83\96';
+       KO_LINE = '\83I\83R\83\\83g\83m\83z\83\82\83\88\83\8d\83\92\83S\83{\83|\83H';
+       kKanji = [$80..$A0, $E0..$ff];
 begin
 
-       delimiter := TStringList.Create;
+       wHiraDelimiter  := TStringList.Create;
+       wHiraFinalDelimiter := TStringList.Create;
+       wKanjiDelimiter := TStringList.Create;
+       words := TStringList.Create;
        try
-               //*** \91¬\93x\83e\83X\83g\92\86
-               wordCount.Duplicates := dupIgnore;
-               wordCount.CaseSensitive := True;
-               wordCount.Capacity := 1000;
-               wordCount.Sorted := True;
-               //***
-
                mode := ModeWhite;
-               delimiter.Text := KAKUJOSI;
-               SetLength( aWord, 256 );
+{$IFNDEF GIKO_BAYESIAN_NO_HIRAGANA_DIC}
+               wHiraDelimiter.Text := WHIRA_DELIMITER;
+               wHiraFinalDelimiter.Text := WHIRA_FINAL_DELIMITER;
+{$ENDIF}
+               wKanjiDelimiter.Text := WKANJI_DELIMITER;
                p                       := PChar( text );
                tail    := p + Length( text );
                last    := p;
 
                while p < tail do begin
-{$IFDEF BENCHMARK}
-                       QueryPerformanceCounter( t1 );
-{$ENDIF}
-                       delimited := False;
                        // \95\8e\9a\82Ì\83^\83C\83v\82ð\94»\95Ê
                        // \81¦\8bå\93Ç\93_\82Í ModeGraph \82É\82È\82é\82Ì\82Å\8cÂ\95Ê\82É\91Î\89\9e\82µ\82È\82­\82Ä\82à\82¢\82¢
-                       if p^ in kYofKanji then begin
+//                     if Byte(Byte( p^ ) - $a1) < $5e then begin
+                       if Byte( p^ ) in kKanji then begin
                                if p + 1 < tail then begin
                                        ch := (PByte( p )^ shl 8) or PByte( p + 1 )^;
                                        case ch of
-                                       $8140:                                                  newMode := ModeWhite;
+                                       // \83X\83y\81[\83X\82Å\92P\8cê\95ª\82¯\82¹\82¸\82É\8bl\82ß\82é
+                                       //$8140:                                                        newMode := ModeWhite;
                                        $8141..$824e:                           newMode := ModeWGraph;
                                        $824f..$8258:                           newMode := ModeWNum;
                                        $8260..$829a:                           newMode := ModeWAlpha;
@@ -435,66 +622,38 @@ begin
                                        $8340..$8396:                           newMode := ModeWKata;
                                        else                                                            newMode := ModeWKanji;
                                        end;
+                                       // '\81J\81K\81[' \82Í\95½\89¼\96¼\81A\82Ü\82½\82Í\83J\83^\83J\83i\82É\8aÜ\82Ü\82ê\82é
+                                       if (mode = ModeWHira) or (mode = ModeWKata) then
+                                               if (ch = $814a) or (ch = $814b) or (ch = $815b) then
+                                                       newMode := mode;
                                end else begin
                                        newMode := ModeWhite;
                                end;
 
                                chSize := 2;
-
-                               // \8bæ\90Ø\82è\82É\82È\82é\95\8e\9a\82ª\82 \82é\82©\8c\9f\8d¸\82·\82é
-                               if p + 3 < tail then begin      // 3 = delimiter \82Ì\8dÅ\91å\8e\9a\90\94 - 1
-                                       for i := 0 to delimiter.Count - 1 do begin
-                                               if CompareMem(
-                                                       p, PChar( delimiter[ i ] ), Length( delimiter[ i ] ) ) then begin
-                                                       delimited := True;
-                                                       chSize := Length( delimiter[ i ] );
-                                                       Break;
-                                               end;
-                                       end;
-                               end;
                        end else begin
-                               case p^ of
-                               #$0..#$20, #$7f:                                newMode := ModeWhite;
-                               '0'..'9':                                                               newMode := ModeNum;
-                               'a'..'z', 'A'..'Z':                     newMode := ModeAlpha;
-                               #$A6..#$DD:                                                     newMode := ModeHanKana;
-                               else                                                                            newMode := ModeGraph;
+                               newMode := Modes( CharMode1[ Byte( p^ ) ] );
+                               if (p^ = ' ') and (Ord( mode ) >= Ord( ModeWGraph )) then begin
+                                       // \8d¡\82Ü\82Å\93ú\96{\8cê\82Å\8d¡\83X\83y\81[\83X
+                                       // \92P\8cê\82ð\8cq\82°\82Ä\8cã\82Å\83X\83y\81[\83X\82ð\8bl\82ß\82é
+                                       // \81¦\94¼\8ap\83J\83i\82Í\92Ê\8fí\83X\83y\81[\83X\82Å\8bæ\90Ø\82é\82¾\82ë\82¤\82©\82ç\8bl\82ß\82È\82¢
+                                       newMode := mode;
                                end;
 
                                chSize := 1;
                        end;
-{$IFDEF BENCHMARK}
-                       QueryPerformanceCounter( t2 );  b1 := b1 + (t2 - t1);
-{$ENDIF}
 
-                       if (mode <> newMode) or delimited then begin
+                       if mode <> newMode then begin
 
                                // \95\8e\9a\82Ì\83^\83C\83v\82ª\95Ï\8dX\82³\82ê\82½
-                               // \82à\82µ\82­\82Í\8bæ\90Ø\82è\82É\82È\82é\95\8e\9a\82É\91\98\8bö\82µ\82½
                                if mode <> ModeWhite then begin
-{$IFDEF BENCHMARK}
-                                       QueryPerformanceCounter( t1 );
-{$ENDIF}
-                                       aWord := Copy( last, 0, p - last );     // \8c\83\92x
-//                                     SetLength( aWord, p - last );
-//                                     CopyMemory( PChar( aWord ), last, p - last );
-{$IFDEF BENCHMARK}
-                                       QueryPerformanceCounter( t2 );  b2 := b2 + (t2 - t1);
-{$ENDIF}
-                                       idx := wordCount.IndexOf( aWord );      // \8c\83\92x
-{$IFDEF BENCHMARK}
-                                       QueryPerformanceCounter( t1 );  b3 := b3 + (t1 - t2);
-{$ENDIF}
-                                       if idx < 0 then begin
-                                               countInfo := TWordCountInfo.Create;
-                                               wordCount.AddObject( aWord, countInfo );
-                                       end else begin
-                                               countInfo := TWordCountInfo( wordCount.Objects[ idx ] );
-                                       end;
-                                       countInfo.WordCount := countInfo.WordCount + 1;
-{$IFDEF BENCHMARK}
-                                       QueryPerformanceCounter( t2 );  b4 := b4 + (t2 - t1);
-{$ENDIF}
+                                       SetLength( aWord, p - last );
+                                       CopyMemory( PChar( aWord ), last, p - last );
+
+                                       words.Text := changeMode( aWord, mode );
+
+                                       // \92P\8cê\93o\98^
+                                       addWord( wordCount, words );
                                end;
 
                                last := p;
@@ -506,18 +665,19 @@ begin
                end;    // while
 
                if mode <> ModeWhite then begin
-                       aWord := Copy( last, 0, p - last );
-                       idx := wordCount.IndexOf( aWord );
-                       if idx < 0 then begin
-                               countInfo := TWordCountInfo.Create;
-                               wordCount.AddObject( aWord, countInfo );
-                       end else begin
-                               countInfo := TWordCountInfo( wordCount.Objects[ idx ] );
-                       end;
-                       countInfo.WordCount := countInfo.WordCount + 1;
+                       SetLength( aWord, p - last );
+                       CopyMemory( PChar( aWord ), last, p - last );
+
+                       words.Text := changeMode( aWord, mode );
+
+                       // \92P\8cê\93o\98^
+                       addWord( wordCount, words );
                end;
        finally
-               delimiter.Free;
+               words.Free;
+               wKanjiDelimiter.Free;
+               wHiraFinalDelimiter.Free;
+               wHiraDelimiter.Free;
        end;
 
 end;
@@ -533,15 +693,22 @@ function TGikoBayesian.CalcPaulGraham( wordCount : TWordCount ) : Extended;
        begin
                info := Objects[ aWord ];
                if info = nil then
-                       Result := 0.4
+                       Result := 0.415
                else if info.NormalWord = 0 then
                        Result := 0.99
                else if info.ImportantWord = 0 then
                        Result := 0.01
-               else
-                       Result := ( info.ImportantWord / info.ImportantText ) /
-                               ((info.NormalWord * 2 / info.NormalText ) +
-                                (info.ImportantWord / info.ImportantText));
+               else if info.ImportantWord + info.NormalWord * 2 < 5 then
+                       Result := 0.5
+               else begin
+                       try
+                               Result := ( info.ImportantWord / info.ImportantText ) /
+                                       ((info.NormalWord * 2 / info.NormalText ) +
+                                        (info.ImportantWord / info.ImportantText));
+                       except
+               on EZeroDivide do Result := 0.99;
+                       end;
+               end;
        end;
 
 var
@@ -569,11 +736,15 @@ begin
                i := min( SAMPLE_COUNT, narray.Count );
                while i > 0 do begin
                        Dec( i );
+
                        s := s * Single( narray[ i ] );
                        q := q * (1 - Single( narray[ i ] ));
                end;
-
-               Result := s / (s + q);
+               try
+                       Result := s / (s + q);
+               except
+            Result := 0.5;
+               end;
        finally
                narray.Free;
        end;
@@ -593,18 +764,27 @@ function TGikoBayesian.CalcGaryRobinson( wordCount : TWordCount ) : Extended;
                if info = nil then
                        Result := 0.415
                else if info.ImportantWord = 0 then
-                       Result := 0.0001
+                       Result := 0.01
                else if info.NormalWord = 0 then
-                       Result := 0.9999
+                       Result := 0.99
                else
+               {
                        Result := ( info.ImportantWord / info.ImportantText ) /
                                ((info.NormalWord / info.NormalText ) +
                                 (info.ImportantWord / info.ImportantText));
+               }
+                       try
+                               Result := (info.ImportantWord * info.NormalText) /
+                                       (info.NormalWord * info.ImportantText +
+                                       info.ImportantWord * info.NormalText);
+                       except
+                               Result := 0.5;
+                       end;
        end;
 
        function f( cnt : Integer; n, mean : Single ) : Extended;
        const
-               k = 0.00001;
+               k = 0.001;
        begin
                Result := ( (k * mean) + (cnt * n) ) / (k + cnt);
        end;
@@ -615,8 +795,7 @@ var
        mean                            : Extended;
        countInfo               : TWordCountInfo;
        i                                               : Integer;
-       normal                  : Extended;
-       important               : Extended;
+       P1, Q1{, R1}    : Extended;
        cnt                                     : Extended;
 begin
 
@@ -634,24 +813,128 @@ begin
        end;
        mean := mean / wordCount.Count;
 
-       cnt                             := 0;
-       normal          := 1;
-       important       := 1;
+       P1 := 1;
+       Q1 := 1;
        for i := 0 to wordCount.Count - 1 do begin
                countInfo       := TWordCountInfo( wordCount.Objects[ i ] );
                n                                               := f( countInfo.WordCount, narray[ i ], mean );
-               normal                  := normal * n;
-               important               := important * (1 - n);
-               if countInfo <> nil then
-                       cnt                                     := cnt + countInfo.WordCount;
+               P1 := P1 * ( 1 - n );
+               Q1 := Q1 * n;
        end;
+       cnt := wordCount.Count;
        if cnt = 0 then
                cnt := 1;
-       normal := 1 - Exp( Ln( normal ) * (1 / cnt) );
-       important := 1 - Exp( Ln( important ) * (1 / cnt) );
+       try
+               P1 := 1 - Power( P1, 1 / cnt );
+       except
+       end;
+       try
+               Q1 := 1 - Power( Q1, 1 / cnt );
+       except
+       end;
 
-       n := (important - normal+ 0.00001) / (important + normal + 0.00001);
-       Result := (1 + n) / 2;
+       if P1 + Q1 = 0 then begin
+               Result := 0.5
+       end else begin
+               n := (P1 - Q1) / (P1 + Q1);
+               Result := (1 + n) / 2;
+       end;
+
+end;
+
+//==============================
+// CalcGaryRobinsonFisher
+//==============================
+function TGikoBayesian.CalcGaryRobinsonFisher(
+       wordCount : TWordCount
+) : Extended;
+
+       function p( const aWord : string ) : Single;
+       var
+               info                            : TWordInfo;
+       begin
+               info := Objects[ aWord ];
+               if info = nil then
+                       Result := 0.415
+               else if info.ImportantWord = 0 then
+                       Result := 0.01
+               else if info.NormalWord = 0 then
+                       Result := 0.99
+               else
+               {
+                       Result := ( info.ImportantWord / info.ImportantText ) /
+                               ((info.NormalWord / info.NormalText ) +
+                                (info.ImportantWord / info.ImportantText));
+               }
+                       Result := (info.ImportantWord * info.NormalText) /
+                               (info.NormalWord * info.ImportantText +
+                               info.ImportantWord * info.NormalText);
+       end;
+
+       function f( cnt : Integer; n, mean : Single ) : Extended;
+       const
+               k = 0.001;
+       begin
+               Result := ( (k * mean) + (cnt * n) ) / (k + cnt);
+       end;
+
+       function prbx( x2, degree : Extended ) : Extended;
+       begin
+
+               Result := 0.5;
+
+       end;
+
+var
+       n                                               : Extended;
+       narray                  : array of Single;
+       mean                            : Extended;
+       countInfo               : TWordCountInfo;
+       i                                               : Integer;
+//     normal                  : Extended;
+//     important               : Extended;
+       P1, Q1                  : Extended;
+       cnt                                     : Extended;
+begin
+
+       if wordCount.Count = 0 then begin
+               Result := 1;
+               Exit;
+       end;
+
+       SetLength( narray, wordCount.Count );
+       mean := 0;
+       for i := 0 to wordCount.Count - 1 do begin
+               n                                               := p( wordCount[ i ] );
+               narray[ i ]     := n;
+               mean                            := mean + n;
+       end;
+       mean := mean / wordCount.Count;
+
+       P1 := 1;
+       Q1 := 1;
+       for i := 0 to wordCount.Count - 1 do begin
+               countInfo       := TWordCountInfo( wordCount.Objects[ i ] );
+               n                                               := f( countInfo.WordCount, narray[ i ], mean );
+               P1 := P1 * ( 1 - n );
+               Q1 := Q1 * n;
+       end;
+       cnt := wordCount.Count;
+       if cnt = 0 then
+               cnt := 1;
+       try
+               P1 := Power( P1, 1 / cnt );
+       except
+       end;
+       try
+               Q1 := Power( Q1, 1 / cnt );
+       except
+       end;
+
+       P1 := 1 - prbx( -2 * Ln( P1 ), 2 * cnt );
+       Q1 := 1 - prbx( -2 * Ln( Q1 ), 2 * cnt );
+
+       Result := (1 + P1 - Q1) / 2;
 
 end;
 
@@ -661,29 +944,18 @@ end;
 function TGikoBayesian.Parse(
        const text                              : string;
        wordCount                                       : TWordCount;
-       algorithm                                       : TGikoBayesianAlgorithm = gbaGaryRonbinson
+       algorithm                                       : TGikoBayesianAlgorithm
 ) : Extended;
-{$IFDEF BENCHMARK}
-var
-       t1, t2  : Int64;
-{$ENDIF}
 begin
 
-{$IFDEF BENCHMARK}
-       QueryPerformanceCounter( t1 );
-{$ENDIF}
        CountWord( text, wordCount );
-{$IFDEF BENCHMARK}
-       QueryPerformanceCounter( t2 );  b5 := b5 + (t2 - t1);
-{$ENDIF}
        case algorithm of
        gbaPaulGraham:          Result := CalcPaulGraham( wordCount );
-       gbaGaryRonbinson:       Result := CalcGaryRobinson( wordCount );
+       gbaGaryRobinson:        Result := CalcGaryRobinson( wordCount );
+       gbaGaryRobinsonFisher:
+                                                                               Result := CalcGaryRobinsonFisher( wordCount );
        else                                                    Result := 0;
        end;
-{$IFDEF BENCHMARK}
-       QueryPerformanceCounter( t1 );  b6 := b6 + (t1 - t2);
-{$ENDIF}
 
 end;
 
@@ -697,18 +969,18 @@ var
        aWord                   : string;
        wordinfo        : TWordInfo;
        countinfo       : TWordCountInfo;
-       i                       : Integer;
+       i                                       : Integer;
 begin
 
        for i := 0 to wordCount.Count - 1 do begin
                aWord := wordCount[ i ];
                wordinfo := Objects[ aWord ];
+               countinfo := TWordCountInfo( wordCount.Objects[ i ] );
                if wordinfo = nil then begin
                        wordinfo := TWordInfo.Create;
                        Objects[ aWord ] := wordinfo;
                end;
 
-               countinfo := TWordCountInfo( wordCount.Objects[ i ] );
                if isImportant then begin
                        wordinfo.ImportantWord := wordinfo.ImportantWord + countinfo.WordCount;
                        wordinfo.ImportantText := wordinfo.ImportantText + 1;
@@ -741,11 +1013,15 @@ begin
 
                countinfo := TWordCountInfo( wordCount.Objects[ i ] );
                if isImportant then begin
-                       wordinfo.ImportantWord := wordinfo.ImportantWord - countinfo.WordCount;
-                       wordinfo.ImportantText := wordinfo.ImportantText - 1;
+                       if wordInfo.ImportantText > 0 then begin
+                               wordinfo.ImportantText := wordinfo.ImportantText - 1;
+                               wordinfo.ImportantWord := wordinfo.ImportantWord - countinfo.WordCount;
+                       end;
                end else begin
-                       wordinfo.NormalWord := wordinfo.NormalWord - countinfo.WordCount;
-                       wordinfo.NormalText := wordinfo.NormalText - 1;
+                       if wordinfo.NormalText > 0 then begin
+                               wordinfo.NormalText := wordinfo.NormalText - 1;
+                               wordinfo.NormalWord := wordinfo.NormalWord - countinfo.WordCount;
+                       end;
                end;
        end;