\file GikoBayesian.pas
\brief \83x\83C\83W\83A\83\93\83t\83B\83\8b\83^
-$Id: GikoBayesian.pas,v 1.5 2004/10/21 03:46:57 yoffy Exp $
+$Id: GikoBayesian.pas,v 1.9 2004/10/27 00:10:12 yoffy Exp $
}
interface
\brief \83t\83B\83\8b\83^\83A\83\8b\83S\83\8a\83Y\83\80
************************************************************}
TGikoBayesianAlgorithm =
- (gbaPaulGraham, gbaGaryRonbinson{, gbaGaryRonbinsonFisher});
+ (gbaPaulGraham, gbaGaryRobinson, gbaGaryRobinsonFisher);
{!***********************************************************
\brief \83x\83C\83W\83A\83\93\83t\83B\83\8b\83^
}
function CalcGaryRobinson( wordCount : TWordCount ) : Extended;
-// function CalcGaryRobinsonFisher( wordCount : TWordCount ) : Extended;
+ {!
+ \brief GaryRobinson-Fisher \96@\82É\8aî\82Ã\82¢\82Ä\95¶\8fÍ\82Ì\92\8d\96Ú\93x\82ð\8c\88\92è\82µ\82Ü\82·
+ \return \95¶\8fÍ\82Ì\92\8d\96Ú\93x (\92\8d\96Ú\82É\92l\82µ\82È\82¢ 0.0\81`1.0 \92\8d\96Ú\82·\82×\82«)
+ }
+ function CalcGaryRobinsonFisher( wordCount : TWordCount ) : Extended;
{!
\brief \95¶\8fÍ\82ð\89ð\90Í
function Parse(
const text : string;
wordCount : TWordCount;
- algorithm : TGikoBayesianAlgorithm = gbaGaryRonbinson
+ algorithm : TGikoBayesianAlgorithm = gbaGaryRobinsonFisher
) : Extended;
{!
0, 1, 1, 1, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 1, 1,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
);
$8340..$8396: newMode := ModeWKata;
else newMode := ModeWKanji;
end;
+ // '\81J\81K\81[' \82Í\95½\89¼\96¼\81A\82Ü\82½\82Í\83J\83^\83J\83i\82É\8aÜ\82Ü\82ê\82é
+ if (mode = ModeWHira) or (mode = ModeWKata) then
+ if (ch = $814a) or (ch = $814b) or (ch = $815b) then
+ newMode := mode;
end else begin
newMode := ModeWhite;
end;
begin
info := Objects[ aWord ];
if info = nil then
- Result := 0.4
+ Result := 0.415
else if info.NormalWord = 0 then
Result := 0.99
else if info.ImportantWord = 0 then
Result := 0.01
+ else if info.ImportantWord + info.NormalWord * 2 < 5 then
+ Result := 0.5
else
Result := ( info.ImportantWord / info.ImportantText ) /
((info.NormalWord * 2 / info.NormalText ) +
i := min( SAMPLE_COUNT, narray.Count );
while i > 0 do begin
Dec( i );
+
s := s * Single( narray[ i ] );
q := q * (1 - Single( narray[ i ] ));
end;
if info = nil then
Result := 0.415
else if info.ImportantWord = 0 then
- Result := 0.0001
+ Result := 0.01
else if info.NormalWord = 0 then
- Result := 0.9999
+ Result := 0.99
else
Result := ( info.ImportantWord / info.ImportantText ) /
((info.NormalWord / info.NormalText ) +
mean : Extended;
countInfo : TWordCountInfo;
i : Integer;
+ P1, Q1, R1 : Extended;
+ cnt : Extended;
+begin
+
+ if wordCount.Count = 0 then begin
+ Result := 1;
+ Exit;
+ end;
+
+ SetLength( narray, wordCount.Count );
+ mean := 0;
+ for i := 0 to wordCount.Count - 1 do begin
+ n := p( wordCount[ i ] );
+ narray[ i ] := n;
+ mean := mean + n;
+ end;
+ mean := mean / wordCount.Count;
+
+ cnt := 0;
+ P1 := 0;
+ Q1 := 0;
+ for i := 0 to wordCount.Count - 1 do begin
+ countInfo := TWordCountInfo( wordCount.Objects[ i ] );
+ n := f( countInfo.WordCount, narray[ i ], mean );
+ if countInfo <> nil then
+ cnt := cnt + countInfo.WordCount;
+ P1 := P1 + Ln( 1 - n ) * countInfo.WordCount;
+ Q1 := Q1 + Ln( n ) * countInfo.WordCount;
+ end;
+ if cnt = 0 then
+ cnt := 1;
+ P1 := 1 - Exp( P1 * (1 / cnt) );
+ Q1 := 1 - Exp( Q1 * (1 / cnt) );
+
+ if P1 + Q1 = 0 then begin
+ Result := 0.5
+ end else begin
+ n := (P1 - Q1) / (P1 + Q1);
+ Result := (1 + n) / 2;
+ end;
+
+end;
+
+//==============================
+// CalcGaryRobinsonFisher
+//==============================
+function TGikoBayesian.CalcGaryRobinsonFisher(
+ wordCount : TWordCount
+) : Extended;
+
+ function p( const aWord : string ) : Single;
+ var
+ info : TWordInfo;
+ begin
+ info := Objects[ aWord ];
+ if info = nil then
+ Result := 0.415
+ else if info.ImportantWord = 0 then
+ Result := 0.01
+ else if info.NormalWord = 0 then
+ Result := 0.99
+ else
+ Result := info.ImportantWord /
+ (info.ImportantWord + info.NormalWord *
+ info.ImportantText / info.NormalText);
+ end;
+
+ function f( cnt : Integer; n, mean : Single ) : Extended;
+ const
+ k = 0.00001;
+ begin
+ Result := ( (k * mean) + (cnt * n) ) / (k + cnt);
+ end;
+
+ function prbx( x2, degree : Extended ) : Extended;
+ var
+ m : Extended;
+ sum : Extended;
+ term : Extended;
+ i : extended;
+ begin
+
+ m := x2 / 2;
+ sum := exp( -m );
+ term := -m;
+
+ i := 1;
+ while i < (degree / 2 - 1) do begin
+ term := term + ln( m / i );
+ sum := sum + exp( term );
+ i := i + 1;
+ end;
+
+ if sum < 1 then
+ Result := sum
+ else
+ Result := 1.0;
+
+ end;
+
+var
+ n : Extended;
+ narray : array of Single;
+ mean : Extended;
+ countInfo : TWordCountInfo;
+ i : Integer;
normal : Extended;
important : Extended;
+ P1, Q1 : Extended;
cnt : Extended;
begin
end;
mean := mean / wordCount.Count;
- cnt := 0;
- normal := 1;
- important := 1;
+ cnt := 0;
+(*
+ P1 := 1;
+ Q1 := 1;
+(*)
+ P1 := 0;
+ Q1 := 0;
+//*
for i := 0 to wordCount.Count - 1 do begin
countInfo := TWordCountInfo( wordCount.Objects[ i ] );
n := f( countInfo.WordCount, narray[ i ], mean );
- normal := normal * n;
- important := important * (1 - n);
if countInfo <> nil then
- cnt := cnt + countInfo.WordCount;
+ cnt := cnt + countInfo.WordCount;
+(*
+ P1 := P1 + Ln( 1 - n ) * countInfo.WordCount;
+ Q1 := Q1 + Ln( n ) * countInfo.WordCount;
+(*)
+ P1 := P1 + Ln( 1 - n );
+ Q1 := Q1 + Ln( n );
+//*)
end;
if cnt = 0 then
cnt := 1;
- normal := 1 - Exp( Ln( normal ) * (1 / cnt) );
- important := 1 - Exp( Ln( important ) * (1 / cnt) );
-
- n := (important - normal+ 0.00001) / (important + normal + 0.00001);
- Result := (1 + n) / 2;
+//(*
+ P1 := prbx( -2 * P1, 2 * cnt );
+ Q1 := prbx( -2 * Q1, 2 * cnt );
+(*)
+ P1 := prbx( -2 * Ln( P1 ), 2 * cnt );
+ Q1 := prbx( -2 * Ln( Q1 ), 2 * cnt );
+//*)
+ if P1 + Q1 = 0 then begin
+ Result := 0.5
+ end else begin
+ Result := (1 + Q1 + P1) / 2;
+ end;
end;
function TGikoBayesian.Parse(
const text : string;
wordCount : TWordCount;
- algorithm : TGikoBayesianAlgorithm = gbaGaryRonbinson
+ algorithm : TGikoBayesianAlgorithm
) : Extended;
begin
CountWord( text, wordCount );
case algorithm of
gbaPaulGraham: Result := CalcPaulGraham( wordCount );
- gbaGaryRonbinson: Result := CalcGaryRobinson( wordCount );
+ gbaGaryRobinson: Result := CalcGaryRobinson( wordCount );
+ gbaGaryRobinsonFisher:
+ Result := CalcGaryRobinsonFisher( wordCount );
else Result := 0;
end;
countinfo := TWordCountInfo( wordCount.Objects[ i ] );
if isImportant then begin
- wordinfo.ImportantWord := wordinfo.ImportantWord - countinfo.WordCount;
- wordinfo.ImportantText := wordinfo.ImportantText - 1;
+ if wordInfo.ImportantText > 0 then begin
+ wordinfo.ImportantText := wordinfo.ImportantText - 1;
+ wordinfo.ImportantWord := wordinfo.ImportantWord - countinfo.WordCount;
+ end;
end else begin
- wordinfo.NormalWord := wordinfo.NormalWord - countinfo.WordCount;
- wordinfo.NormalText := wordinfo.NormalText - 1;
+ if wordinfo.NormalText > 0 then begin
+ wordinfo.NormalText := wordinfo.NormalText - 1;
+ wordinfo.NormalWord := wordinfo.NormalWord - countinfo.WordCount;
+ end;
end;
end;