From 7edfa0f09b28c926f42ddf9402cddcf6a8f2501d Mon Sep 17 00:00:00 2001 From: zako Date: Wed, 7 Jan 2015 20:52:19 +0900 Subject: [PATCH] =?utf8?q?NG=E3=83=AF=E3=83=BC=E3=83=89=E3=81=A7=E7=89=B9?= =?utf8?q?=E5=AE=9A=E3=81=AE=E6=AD=A3=E8=A6=8F=E8=A1=A8=E7=8F=BE=E3=82=92?= =?utf8?q?=E4=BD=BF=E7=94=A8=E3=81=99=E3=82=8B=E3=81=A8=E3=83=95=E3=83=AA?= =?utf8?q?=E3=83=BC=E3=82=BA=E3=81=99=E3=82=8B=E4=B8=8D=E5=85=B7=E5=90=88?= =?utf8?q?=E3=82=92=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- bmRegExp/Readme.txt | 5 +- bmRegExp/bmregexp.pas | 4142 +++++++++++++++++++++++++++++++++++++++++++++++++ gikoNavi.res | Bin 4316 -> 4316 bytes 3 files changed, 4143 insertions(+), 4 deletions(-) create mode 100644 bmRegExp/bmregexp.pas diff --git a/bmRegExp/Readme.txt b/bmRegExp/Readme.txt index 33093e5..f4b9a52 100644 --- a/bmRegExp/Readme.txt +++ b/bmRegExp/Readme.txt @@ -1,11 +1,8 @@ ³‹K•\Œ»•¶Žš—ñ‘€ìƒ‰ƒCƒuƒ‰ƒŠ‚ɂ‚¢‚Ä -‚±‚̃tƒHƒ‹ƒ_‚̃\[ƒXƒR[ƒh‚Í“Y•t‚µ‚Ä‚¢‚Ü‚¹‚ñB -ˆÈ‰º‚Ì‚Æ‚±‚ë‚©‚çƒ_ƒEƒ“ƒ[ƒh‚·‚é•K—v‚ª‚ ‚è‚Ü‚·B +ƒMƒRƒiƒr‚ł͉º‹Lƒy[ƒW‚ÅŒöŠJ‚³‚ê‚Ä‚¢‚é REXP017.LZH ‚ðC³‚µ‚ÄŽg—p‚µ‚Ä‚¢‚Ü‚·B http://homepage1.nifty.com/bmonkey/delphi/delphi.html -ƒMƒRƒiƒr‚Å‚Í REXP017.LZH ‚ðŽg—p‚µ‚Ä‚¢‚Ü‚·B - ”ñí‚É•Ö—˜‚ȃ‰ƒCƒuƒ‰ƒŠ‚ðŒöŠJ‚µ‚Ä‚¢‚ç‚Á‚µ‚á‚é•õ“‡—YŽ¡‚³‚ñ‚ÉŠ´ŽÓŠ´ŒƒI diff --git a/bmRegExp/bmregexp.pas b/bmRegExp/bmregexp.pas new file mode 100644 index 0000000..66886e9 --- /dev/null +++ b/bmRegExp/bmregexp.pas @@ -0,0 +1,4142 @@ +unit bmRegExp; +{****************************************************************************** +ƒ^ƒCƒgƒ‹ F³‹K•\Œ»‚ðŽg‚Á‚½•¶Žš—ñ’Tõ/‘€ìƒRƒ“ƒ|[ƒlƒ“ƒgWver0.17 +ƒ†ƒjƒbƒg–¼ FbmRegExp.pas +ƒo[ƒWƒ‡ƒ“ Fversion 0.17 +“ú•t F2001/09/15 +“®ìŠm”FŠÂ‹« F Windows 98 + Borland Delphi6 Japanese Personal edition +ìŽÒ F •õ“‡ bmonkey —YŽ¡ ggb01164@nifty.ne.jp +•ÏX—š—ð F 0.17 ƒoƒOC³ 2001/09/15 + F EMP3‚Ì–‚pŽt‚³‚ñ‚É‹³‚¦‚Ä’¸‚¢‚½ƒƒ‚ƒŠƒŠ[ƒN‚̏C³‚ð“K—pB + F E‚¨‚©‚Û‚ñ‚³‚ñ‚É‹³‚¦‚Ä’¸‚¢‚½ƒƒ‚ƒŠƒŠ[ƒN‚̏C³‚ð“K—pB + F Ú×‚Í“¯«‚Ìchangelog.html‚ðŽQÆB + F 0.16 ‘æ“ñŽŸ ˆê”ÊŒöŠJ 1998/03/07 + F version 0.15 -> version 0.16 + F ETGrep‚ʼnp‘啶Žš/¬•¶Žš‚ª³‚µ‚­ŒŸõ‚Å‚«‚È‚©‚Á‚½ƒoƒO‚ðC³B + F EŠ¿Žš‚̃Lƒƒƒ‰ƒNƒ^ƒNƒ‰ƒXŽw’è([ˆŸ-ê¤]‚È‚Ç)‚̃oƒO‚ðC³B + F EDelphi3, C++Builder1‚ɑΉž + F Eƒ†ƒjƒbƒgƒtƒ@ƒCƒ‹–¼‚ðRegExp.pas‚©‚çbmRegExp.pas‚ɕύX + F EƒRƒ“ƒpƒCƒ‰‚ÌŒµ‚µ‚­‚È‚Á‚½Œ^ƒ`ƒFƒbƒN‚ɑΉž + F EMBUtils.pas‚ðŽg‚í‚È‚¢‚悤‚ɕύXB + F 0.15B ƒoƒOC³ADelphi3AC++Builder1‘Ήž + F 0.15 ˆê”ÊŒöŠJ +Žå—vƒNƒ‰ƒX F TGrep, TAWKStr +Œp³ŠÖŒW F TObject + +******************************************************************************* +Žg—p•û–@ Fƒwƒ‹ƒvƒtƒ@ƒCƒ‹‚ƃTƒ“ƒvƒ‹ƒvƒƒWƒFƒNƒg‚ðŽQÆ‚Ì‚±‚Æ +•â‘«à–¾ F + +’è‹`Œ^ F + +******************************************************************************} + +interface + +uses + SysUtils, Classes, Windows, Forms +{$IFDEF DEBUG} + ,OutLine +{$ENDIF} + ; + +type +{ -========================== —áŠOƒNƒ‰ƒX =====================================-} +{TREParser ‚ªo‚·—áŠOB + ErrorPos ‚É‚æ‚Á‚āA³‹K•\Œ»•¶Žš—ñ‚̉½•¶Žš–Ú‚Å—áŠO‚ª”­¶‚µ‚½‚©‚ðŽ¦‚·B} + ERegExpParser = class(Exception) + public + ErrorPos: Integer; + constructor Create(const Msg: string; ErrorPosition: Word); + end; +{-============================= ====================-} +{ ‚QƒoƒCƒg•¶ŽšŒ^} + WChar_t = Word; + +{ ‚QƒoƒCƒg•¶ŽšŒ^‚ւ̃|ƒCƒ“ƒ^Œ^} + PWChar_t = ^WChar_t; + +{ WChar_tŒ^ ‚Q‚‚Ԃñ‚ÌŒ^} + DoubleWChar_t = Integer; + +{ -====================== •¶Žš—ñ‘€ìŠÖ” =====================================-} + {à–¾ F ‚P‚Ui”‚ð•\‚·•¶Žš‚ðŽó‚¯Žæ‚èA®”‚É‚µ‚Ä•Ô‚·B + ˆø” F WCh: WChar_t; 16i”‚ð•\‚·1ƒoƒCƒg•¶Žš [0-9a-fA-F] + + •Ô‚è’lF ³íŽžF 0 <= result <= 15 + ˆÙíŽžF -1} + function HexWCharToInt(WCh: WChar_t): Integer; + + {à–¾ F ‚Wi”‚ð•\‚·•¶Žš‚ðŽó‚¯Žæ‚èA®”‚É‚µ‚Ä•Ô‚·B + ˆø” F WCh: WChar_t; 8i”‚ð•\‚·1ƒoƒCƒg•¶Žš [0-7] + + •Ô‚è’lF ³íŽžF 0 <= result <= 7 + ˆÙíŽžF -1} + function OctWCharToInt(WCh: WChar_t): Integer; + + {à–¾ F 16i”•\‹L‚Ì•¶Žš—ñ‚ðWordŒ^‚̐”’l‚É•ÏŠ·‚·‚éB + ˆø” F Str: String •ÏŠ·Œ³‚Ì•¶Žš—ñ + Index: Integer ˆø”Str‚ÌIndex”Ԗڂ̃oƒCƒgˆÊ’u‚©‚ç•ÏŠ·‚ðŽn‚ß‚éB + •›ì—pF ˆ—‚µ‚½ƒoƒCƒg”‚¾‚¯Index‚ªƒCƒ“ƒNƒŠƒƒ“ƒg‚³‚ê‚éB + •Ô‚è’lF •¶Žš—ñ‚ª•\‚·WordŒ^‚Ì’l} + function HexStrToInt(const Str: String; var Index: Integer): Word; + + {à–¾ F ‚Wi”•\‹L‚Ì•¶Žš—ñ‚ðWordŒ^‚̐”’l‚É•ÏŠ·‚·‚éB + ˆø” F Str: String •ÏŠ·Œ³‚Ì•¶Žš—ñ + Index: Integer ˆø”Str‚ÌIndex”Ԗڂ̃oƒCƒgˆÊ’u‚©‚ç•ÏŠ·‚ðŽn‚ß‚éB + •›ì—pF ˆ—‚µ‚½ƒoƒCƒg”‚¾‚¯Index‚ªƒCƒ“ƒNƒŠƒƒ“ƒg‚³‚ê‚éB + •Ô‚è’lF •¶Žš—ñ‚ª•\‚·WordŒ^‚Ì’l} + function OctStrToInt(const Str: String; var Index: Integer): Word; + + {à–¾ F ˆø”Str‚©‚ç‚P•¶Žš“ǂݏo‚·B + “®ì F ˆø”Str ‚Ì’†‚̈ø”Index”Ԗڂ̃oƒCƒgˆÊ’u‚©‚ç‚P•¶Žš“¾‚āAIndex‚𑝂₷B + ˆø” F Str: String; ‚QƒoƒCƒg•¶ŽšƒR[ƒh‚ðŠÜ‚ñ‚¾String + Index: Integer; •¶Žš‚ð“ǂݏo‚·ˆÊ’u‚̐擪‚©‚ç‚̃oƒCƒg” + •Ô‚è’lF “ǂݏo‚µ‚½ WChar_tŒ^‚Ì’l + •›ì—pF + ’ˆÓ F Index‚ª•¶Žš—ñ‚Ì’·‚³‚æ‚è‚·‚Å‚É’·‚¢ê‡‚͏í‚É WChType.Null‚ð•Ô‚µAIndex‚𑝂₳‚È‚¢B + ‚‚܂èAIndex‚͍őå‚Å‚à Length(Str)+1 ‚Å‚ ‚éB} + function GetWChar(const Str: String;var Index: Integer): WChar_t; + + {‹@”\F GetWCharƒƒ\ƒbƒh‚É‚æ‚Á‚Đi‚ñ‚¾ Index‚ð‚P•¶Žš•ª–ß‚·(1`‚QƒoƒCƒg) + ’ˆÓF ƒkƒ‹EƒLƒƒƒ‰ƒNƒ^(GetWChar‚Ì•Ô‚è’l WChType.Null)‚ð–ß‚·‚±‚Æ‚Í‚Å‚«‚È‚¢B} + procedure UnGetWChar(const Str: String; var Index: Integer); + + + {‹@”\F GetWChar‚ÌPCharŒ^ƒo[ƒWƒ‡ƒ“} + function PCharGetWChar(var pText: PChar): WChar_t; + + {‹@”\F WChar_tŒ^‚Ì’l‚ðStringŒ^‚Ö•ÏŠ·‚·‚éB} + function WCharToStr(WCh: WChar_t): String; + + {‹@”\F '\' ‚Å ˆø—p‚³‚ꂽƒLƒƒƒ‰ƒNƒ^‚𓾂éB \n, \t \\ ... + ’ˆÓF Index‚Í'\'‚ÌŽŸ‚Ì•¶Žš‚ðŽw‚µ‚Ä‚¢‚é‚Æ‚·‚éB} + function GetQuotedWChar(const Str: String; var Index: Integer): WChar_t; + + + {à–¾ F FS:WChar_t‚ð‹æØ‚蕶Žš‚Æ‚µ‚āAƒoƒCƒgˆÊ’uIndex‚©‚çŽn‚Ü‚éƒg[ƒNƒ“‚ð‚P‚•Ԃ·B + ˆø” F Str: String + Index: Integer ˆø”Str‚ÌIndex”Ԗڂ̃oƒCƒgˆÊ’u‚©‚ç•ÏŠ·‚ðŽn‚ß‚éB + •Ô‚è’lF FS‚Å‹æØ‚ç‚ꂽAƒoƒCƒgˆÊ’uIndex‚©‚çŽn‚Ü‚éƒg[ƒNƒ“} + function WCharGetToken(const Str: String; var Index: Integer; FS: WChar_t): String; + + + {à–¾ F ˆø”Str’†‚̃ƒ^ƒLƒƒƒ‰ƒNƒ^‚É'\'‚ð‚‚¯‚éB + ˆø” F Str: String + •Ô‚è’lF ƒƒ^ƒLƒƒƒ‰ƒNƒ^‚Ì‘O‚É'\'‚ª‚‚¢‚½Str} + function QuoteMetaWChar(Str: String): String; + +const + CONST_DOLLAR = $24; // '$' + CONST_LPAR = $28; // '(' + CONST_RPAR = $29; // ')' + CONST_STAR = $2A; // '*' + CONST_PLUS = $2B; // '+' + CONST_DOT = $2E; // '.' + CONST_QMARK = $3F; // '?' + CONST_VL = $7C; // '|' + + CONST_LBRA = $5B; // '[' + CONST_RBRA = $5D; // ']' + CONST_CARET = $5E; // '^' + CONST_YEN = $5C; // '\' + CONST_MINUS = $2D; // '-' + + CONST_b = $62; // 'b' + CONST_r = $72; // 'r' + CONST_n = $6E; // 'n' + CONST_t = $74; // 't' + CONST_x = $78; // 'x' + + CONST_BS = $08; // BackSpace + CONST_CR = $0D; // Carriage Return + CONST_LF = $0A; // Line Feed + CONST_TAB = $09; // TAB + + CONST_ANP = $26; // '&' + + CONST_NULL = $0000; + + METACHARS: Array[0..11] of WChar_t = (CONST_CARET, + CONST_LPAR, + CONST_VL, + CONST_RPAR, + CONST_PLUS, + CONST_STAR, + CONST_QMARK, + CONST_DOT, + CONST_LBRA, + CONST_RBRA, + CONST_DOLLAR, + CONST_YEN); + + CONST_EMPTY = $FFFF; {TNFA, TDFAó‘Ô•\‚Łu•¶Žš‚ª‚È‚¢v‚±‚Æ‚ð•\‚·ƒR[ƒh‚Æ‚µ‚ÄŽg‚¤} + CONST_LINEHEAD = $FFFD; {•¶“ªƒƒ^ƒLƒƒƒ‰ƒNƒ^'^'‚ð•\‚·•¶ŽšƒR[ƒh‚Æ‚µ‚ÄŽg‚¤B} + CONST_LINETAIL = $FFFE; {•¶”öƒƒ^ƒLƒƒƒ‰ƒNƒ^'$'‚ð•\‚·•¶ŽšƒR[ƒh‚Æ‚µ‚ÄŽg‚¤B} + + REFuzzyWChars: array [0..144] of String = + ('‚`,‚,A,a', + '‚a,‚‚,B,b', + '‚b,‚ƒ,C,c', + '‚c,‚„,D,d', + '‚d,‚…,E,e', + '‚e,‚†,F,f', + '‚f,‚‡,G,g', + '‚g,‚ˆ,H,h', + '‚h,‚‰,I,i', + '‚i,‚Š,J,j', + '‚j,‚‹,K,k', + '‚k,‚Œ,L,l', + '‚l,‚,M,m', + '‚m,‚Ž,N,n', + '‚n,‚,O,o', + '‚o,‚,P,p', + '‚p,‚‘,Q,q', + '‚q,‚’,R,r', + '‚r,‚“,S,s', + '‚s,‚”,T,t', + '‚t,‚•,U,u', + '‚u,‚–,V,v', + '‚v,‚—,W,w', + '‚w,‚˜,X,x', + '‚x,‚™,Y,y', + '‚y,‚š,Z,z', + '0,‚O,—ë', + '1,‚P,ˆê,‡@,‡T,ˆë', + '2,‚Q,“ñ,‡A,‡U,“ó', + '3,‚R,ŽO,‡B,‡V,ŽQ', + '4,‚S,Žl,‡C,‡W', + '5,‚T,ŒÜ,‡D,‡X,ŒÞ', + '6,‚U,˜Z,‡E,‡Y', + '7,‚V,Žµ,‡F,‡Z', + '8,‚W,”ª,‡G,‡[', + '9,‚X,‹ã,‡H,‡\', + '"@"," "', + '!,I', + '"""",h', + '#,”', + '$,', + '%,“', + '&,•', + ''',f', + '(,i', + '),j', + '*,–', + '+,{', + '[,`,°,', { ’·‰¹‹L†‚́A''ƒkƒ‹‚Æ‚àˆê’v‚³‚¹‚é} + '-,[,|,`,°', + '¥,E', + '/,^', + ':,F', + ';,G', + '<,ƒ', + '=,', + '>,„', + '?,H', + '@,—', + '[,m,k', + '\,', + '],n,l', + '^,O', + '_,Q', + '{,o', + '|,b', + '},p', + '~,P', + '",",¤,A,C', + '¡,.,B,D', + 'u,w,¢', + 'v,x,£', + '‚ñ,ƒ“,Ý', + '‚ª,ƒK,¶Þ,‚©J,ƒJJ', + '‚¬,ƒM,·Þ,‚«J,ƒLJ', + '‚®,ƒO,¸Þ,‚­J,ƒNJ', + '‚°,ƒQ,¹Þ,‚¯J,ƒPJ', + '‚²,ƒS,ºÞ,‚±J,ƒRJ', + '‚´,ƒU,»Þ,‚³J,ƒTJ', + '‚¶,ƒW,¼Þ,‚µJ,ƒVJ,‚À,ƒa,ÁÞ,‚¿J,ƒ`J', + '‚¸,ƒY,½Þ,ƒXJ,ƒXJ,‚Ã,ƒd,ÂÞ,‚J,ƒcJ', + '‚º,ƒ[,¾Þ,‚¹J,ƒZJ', + '‚¼,ƒ],¿Þ,‚»J,ƒ\J', + '‚¾,ƒ_,ÀÞ,‚½J,ƒ^J', + '‚Å,ƒf,ÃÞ,‚āJ,ƒeJ', + '‚Ç,ƒh,ÄÞ,‚ƁJ,ƒgJ', + '‚Î,ƒo,ÊÞ,‚́J,ƒnJ,ƒ”ƒ@,‚¤J‚Ÿ,ƒEJƒ@,³Þ§', + '‚Ñ,ƒr,ËÞ,‚ЁJ,ƒqJ,ƒ”ƒB,‚¤J‚¡,ƒEJƒB,³Þ¨', + '‚Ô,ƒu,ÌÞ,‚ӁJ,ƒtJ,ƒ”,ƒEJ,‚¤J,³Þ', + '‚×,ƒx,ÍÞ,‚ցJ,ƒwJ,ƒ”ƒF,‚¤J‚¥,ƒEJƒF,³Þª', + '‚Ú,ƒ{,ÎÞ,‚فJ,ƒzJ,ƒ”ƒH,‚¤J‚§,ƒEJƒH,³Þ«', + '‚Ï,ƒp,Êß,‚́K,ƒnK', + '‚Ò,ƒs,Ëß,‚ЁK,ƒqK', + '‚Õ,ƒv,Ìß,‚ӁK,ƒtK', + '‚Ø,ƒy,Íß,‚ցK,ƒwK', + '‚Û,ƒ|,Îß,‚فK,ƒzK', + '‚ ,ƒA,±,‚Ÿ,ƒ@,§', + '‚¢,ƒC,²,‚¡,ƒB,¨', + '‚¤,ƒE,³,‚£,ƒD,©', + '‚¦,ƒG,´,‚¥,ƒF,ª', + '‚¨,ƒI,µ,‚§,ƒH,«', + '‚©,ƒJ,¶', + '‚«,ƒL,·', + '‚­,ƒN,¸', + '‚¯,ƒP,¹', + '‚±,ƒR,º', + '‚³,ƒT,»', + '‚µ,ƒV,¼', + '‚·,ƒX,½', + '‚¹,ƒZ,¾', + '‚»,ƒ\,¿', + '‚½,ƒ^,À', + '‚¿,ƒ`,Á', + '‚Â,ƒc,Â,‚Á,ƒb,¯', + '‚Ä,ƒe,Ã', + '‚Æ,ƒg,Ä', + '‚È,ƒi,Å', + '‚É,ƒj,Æ', + '‚Ê,ƒk,Ç', + '‚Ë,ƒl,È', + '‚Ì,ƒm,É', + '‚Í,ƒn,Ê', + '‚Ð,ƒq,Ë', + '‚Ó,ƒt,Ì', + '‚Ö,ƒw,Í', + '‚Ù,ƒz,Î', + '‚Ü,ƒ},Ï', + '‚Ý,ƒ~,Ð', + '‚Þ,ƒ€,Ñ', + '‚ß,ƒ,Ò', + '‚à,ƒ‚,Ó', + '‚â,ƒ„,Ô,‚á,ƒƒ,¬', + '‚ä,ƒ†,Õ,‚ã,ƒ…,­', + '‚æ,ƒˆ,Ö,‚å,ƒ‡,®', + '‚ç,ƒ‰,×', + '‚è,ƒŠ,Ø', + '‚é,ƒ‹,Ù', + '‚ê,ƒŒ,Ú', + '‚ë,ƒ,Û', + '‚í,ƒ,Ü,‚¤‚Ÿ,ƒEƒ@,³§', + 'ƒ,‚î,‚¤‚¡,ƒEƒB,³¨', + 'ƒ‘,‚ï,‚¤‚¥,ƒEƒF,³ª', + '‚ð,ƒ’,¦,‚¤‚§,ƒEƒH,³«', + 'Þ,J', + 'ß,K'); {‘÷“_A”¼‘÷“_‚Í‚±‚̈ʒu‚É‚È‚¢‚Æ h‚ªh¨h¶Þh‚É•ÏŠ·‚³‚ê‚È‚¢B} + +type +{ -============================= TREScanner Class ==================================-} + { •¶Žš‚͈̔͂ð•\‚·Œ^B} + RECharClass_t = record + case Char of + #0: (StartChar: WChar_t; EndChar: WChar_t); + #1: (Chars: DoubleWChar_t); + end; + +const + CONST_EMPTYCharClass: RECharClass_t = ( StartChar: CONST_EMPTY; + EndChar: CONST_EMPTY); + +type + + { RECharClass_t‚ւ̃|ƒCƒ“ƒ^Œ^} + REpCharClass_t = ^RECharClass_t; + + {ƒg[ƒNƒ“‚ÌŽí—Þ‚ð•\‚·Œ^ } + REToken_t = ( retk_Char, {’ʏí‚Ì•¶Žš } + retk_CharClass, {'[]'‚ň͂܂ꂽƒLƒƒƒ‰ƒNƒ^ƒNƒ‰ƒX³‹K•\Œ»‚Ì’†‚Å + '-'‚ðŽg‚Á‚Ĕ͈͎w’肳‚ꂽ•¨ } + retk_Union, { '|'} + retk_LPar, { '('} + retk_RPar, { ')'} + retk_Star, { '*'} + retk_Plus, { '+'} + retk_QMark, { '?'} + retk_LBra, { '['} + retk_LBraNeg, { '[O'} + retk_RBra, { ']'} + retk_Dot, { '.'} + retk_LHead, { '^'} + retk_LTail, { '$'} + retk_End); { •¶Žš—ñ‚̏I‚í‚è } + + { REToken_t‚̏W‡W‡Œ^} + RETokenSet_t = set of REToken_t; + + RESymbol_t = record + case REToken_t of + retk_CharClass: (CharClass: RECharClass_t); + retk_Char: (WChar: WChar_t); + end; + +{œ •¶Žš—ñ‚©‚çƒg[ƒNƒ“‚ðØ‚èo‚·ƒNƒ‰ƒX} + TREScanner = class + private + FRegExpStr: String; + FIndex: Integer; + FToken: REToken_t; + FSymbol: RESymbol_t; + FInCharClass: Boolean; + protected + procedure SetRegExpStr(RegExpStr: String); + + {ŽŸ‚̃g[ƒNƒ“‚𓾂éB} + function GetTokenStd: REToken_t; virtual; + {ƒLƒƒƒ‰ƒNƒ^ƒNƒ‰ƒX³‹K•\Œ» "[ ]" ‚Ì’†‚̃g[ƒNƒ“‚𓾂éB} + function GetTokenCC: REToken_t; virtual; + public + constructor Create(Str: String); + + function GetToken: REToken_t; + + {Œ»Ý‚̃g[ƒNƒ“} + property Token: REToken_t read FToken; + + { Token‚ɑΉž‚·‚镶Žš[—ñ](Lexeme) + Token <> retk_CharClass ‚Ì‚Æ‚« Œ»Ý‚̃g[ƒNƒ“‚Ì•¶Žš’l WChar_tŒ^ + Token = retk_CharClass ‚Ì‚Æ‚«‚ÍRECharClass_tƒŒƒR[ƒhŒ^ + ¦FToken = retk_LBraNeg‚ÌŽž‚̓uƒ‰ƒPƒbƒg'['‚P•¶Žš•ª‚µ‚©‚È‚¢B} + property Symbol: RESymbol_t read FSymbol; + + {ˆ—‘Ώۂ̕¶Žš—ñ} + property RegExpStr: String read FRegExpStr write SetRegExpStr; + + {ƒCƒ“ƒfƒbƒNƒX + InputStr•¶Žš—ñ’†‚ÅŽŸ‚ÌGetWCharƒƒ\ƒbƒh‚ŏˆ—‚·‚镶Žš‚̃Cƒ“ƒfƒbƒNƒX + ¦ Symbol‚ÌŽŸ‚Ì•¶Žš‚ðŽw‚µ‚Ä‚¢‚邱‚Æ‚É’ˆÓ} + property Index: Integer read FIndex; + end; + +{-============================= ====================-} + {ƒg[ƒNƒ“‚̏î•ñ‚ð‚ЂƂ܂Ƃ߂ɂµ‚½‚à‚Ì} + RETokenInfo_t = record + Token: REToken_t; + Symbol: RESymbol_t; + FromIndex: Integer; + ToIndex: Integer; + end; + + REpTokenInfo_t = ^RETokenInfo_t; + + {TREPreProcessorƒNƒ‰ƒX“à•”‚ÅŽg—p} + TREPreProcessorFindFunc = function(FromTokenIndex, ToTokenIndex: Integer): Integer of object; + + TREPreProcessor = class + private + FScanner: TREScanner; + FProcessedRegExpStr: String; + FListOfSynonymDic: TList; + FListOfFuzzyCharDic: TList; + FTokenList: TList; + FSynonymStr: String; + + FUseFuzzyCharDic: Boolean; + FUseSynonymDic: Boolean; + protected + procedure MakeTokenList; + procedure DestroyTokenListItems; + + function ReferToOneList(FromTokenIndex, ToTokenIndex: Integer; SynonymDic: TList): Integer; + function FindSynonym(FromTokenIndex, ToTokenIndex: Integer): Integer; + function FindFuzzyWChar(FromTokenIndex, ToTokenIndex: Integer): Integer; + + procedure Process(FindFunc: TREPreProcessorFindFunc); + + function GetTargetRegExpStr: String; + procedure SetTargetRegExpStr(Str: String); + public + constructor Create(Str: String); + destructor Destroy; override; + procedure Run; + + property TargetRegExpStr: String read GetTargetRegExpStr write SetTargetRegExpStr; + property ProcessedRegExpStr: String read FProcessedRegExpStr; + + property UseSynonymDic: Boolean read FUseSynonymDic write FUseSynonymDic; + property ListOfSynonymDic: TList read FListOfSynonymDic; + property UseFuzzyCharDic: Boolean read FUseFuzzyCharDic write FUseFuzzyCharDic; + property ListOfFuzzyCharDic: TList read FListOfFuzzyCharDic; + end; + +{-=========================== TREParseTree Class ===============================-} +{************************************************************************** +œ \•¶–Ø‚ðŠÇ—‚·‚éƒNƒ‰ƒX TREParseTree + +“Á’¥F ’†ŠÔß(Internal node)‚Æ—t(Leaf)‚ðì‚é‚Æ‚«‚́A‚»‚ꂼ‚êMakeInternalNode + ƒƒ\ƒbƒh‚ÆMakeLeafƒƒ\ƒbƒh‚ðŽg‚¤B + ‚Ü‚½A\•¶–؂Ƃ͕ʂɁAFNodeList‚ÆFLeafList‚©‚ç’†ŠÔß‚Æ—t‚ÖƒŠƒ“ƒN‚µ‚Ä + ‚¨‚­‚±‚Æ‚É‚æ‚èA“r’†‚ŃGƒ‰[‚ª”­¶‚µ‚Ä‚à•K‚¸ƒƒ‚ƒŠ‚ðŠJ•ú‚·‚éB +**************************************************************************} + { TREParseTree‚̐߂̎í—Þ‚ð•\‚·Œ^} + REOperation_t = (reop_Char, { •¶Žš‚»‚Ì‚à‚Ì } + reop_LHead, { •¶“ª } + reop_LTail, { •¶”ö } + reop_Concat, { XY } + reop_Union, { X|Y} + reop_Closure, { X* } + reop_Empty); { ‹ó } + + { RENode_t‚ւ̃|ƒCƒ“ƒ^Œ^} + REpNode_t = ^RENode_t; + + { TREParseTree‚ÌŽqß‚ւ̃|ƒCƒ“ƒ^Œ^} + REChildren_t = record + pLeft: REpNode_t; + pRight: REpNode_t; + end; + + { TREParseTree‚̐ß} + RENode_t = record + Op: REOperation_t; + case Char of + #0: (CharClass: RECharClass_t); + #1: (Children: REChildren_t); + end; + +{œ \•¶–Ø‚ðŠÇ—‚·‚éƒNƒ‰ƒX} + TREParseTree = class + private + FpHeadNode: REpNode_t;{\•¶–Ø‚Ì’¸“_‚É‚ ‚éß} + FNodeList: TList; {’†ŠÔß‚̃ŠƒXƒgB} + FLeafList: TList; {—t‚̃ŠƒXƒgB} + public + constructor Create; + destructor Destroy; override; + + {\•¶–Ø‚Ì“à•”ß‚ðì¬B + op ‚̓m[ƒh‚ª•\‚·‰‰ŽZAleft‚͍¶‚ÌŽqAright‚͉E‚ÌŽq } + function MakeInternalNode(TheOp: REOperation_t; pLeft, pRight: REpNode_t): REpNode_t; + + {\•¶–Ø‚Ì—t‚ðì¬B + aStartChar, aEndChar ‚ŃLƒƒƒ‰ƒNƒ^ƒNƒ‰ƒX‚ð•\‚·} + function MakeLeaf(aStartChar, aEndChar: WChar_t): REpNode_t; + + {”CˆÓ‚̈ꕶŽš‚ð•\‚·'.'ƒƒ^ƒLƒƒƒ‰ƒNƒ^‚ɑΉž‚·‚é•”•ª–Ø‚ðì‚éB + ¦CR LF‚ðœ‚­‘S‚ẴLƒƒƒ‰ƒNƒ^‚ð•\‚·—t‚ðreop_Union‘€ì‚ð•\‚·’†ŠÔß‚ÅŒ‹‚ñ‚¾‚à‚Ì} + function MakeAnyCharsNode: REpNode_t; virtual; + + {•¶“ªƒƒ^ƒLƒƒƒ‰ƒNƒ^‚ð•\‚·—t‚ðì¬ + ¦ —t‚ð•Ô‚·‚ªAMakeInternalNode‚ðŽg‚¤B} + function MakeLHeadNode(WChar: WChar_t): REpNode_t; + + {•¶”öƒƒ^ƒLƒƒƒ‰ƒNƒ^‚ð•\‚·—t‚ðì¬ + ¦ —t‚ð•Ô‚·‚ªAMakeInternalNode‚ðŽg‚¤B} + function MakeLTailNode(WChar: WChar_t): REpNode_t; + + {ˆø”‚ª aStartChar <= aEndChar ‚ÌŠÖŒW‚ð–ž‚½‚µ‚Ä‚¢‚é‚Æ‚«‚ɁAMakeLeaf‚ðŒÄ‚Ô + ‚»‚êˆÈŠO‚́Anil ‚ð•Ô‚·B} + function Check_and_MakeLeaf(aStartChar, aEndChar: WChar_t):REpNode_t; + + {—t‚ð“à•”ß‚É•Ï‚¦‚éB} + procedure ChangeLeaftoNode(pLeaf, pLeft, pRight: REpNode_t); + + {‘S‚Ä‚Ì—t‚ªŽ‚ƒLƒƒƒ‰ƒNƒ^ƒNƒ‰ƒX‚͈̔͂ª‚»‚ꂼ‚êd•¡‚µ‚È‚¢‚悤‚É•ªŠ„‚·‚éB} + procedure ForceCharClassUnique; + + {‚·‚ׂĂ̐߁i“à•”ßA—tj‚ðíœB} + procedure DisposeTree; + + {\•¶–Ø‚Ì’¸“_‚É‚ ‚éß} + property pHeadNode: REpNode_t read FpHeadNode write FpHeadNode; + + {“à•”ß‚̃ŠƒXƒg} + property NodeList: TList read FNodeList; + {—t‚̃ŠƒXƒg} + property LeafList: TList read FLeafList; + end; + +{-=========================== TREParser Class ===============================-} +{œ ³‹K•\Œ»•¶Žš—ñ‚ð‰ðÍ‚µ‚č\•¶–Ø‚É‚·‚éƒp[ƒT[ } + TREParser = class + private + FParseTree: TREParseTree; {ƒ†ƒjƒbƒgParseTre.pas ‚Å’è‹`‚³‚ê‚Ä‚¢‚é\•¶–؃Nƒ‰ƒX} + FScanner: TREScanner; {ƒg[ƒNƒ“ŠÇ—ƒNƒ‰ƒX} + + protected + { ‚ðƒp[ƒX‚µ‚āA“¾‚ç‚ꂽ\•¶–Ø‚ð•Ô‚·B + ‘I‘ð X|Y ‚ð‰ðÍ‚·‚é} + function Regexp: REpNode_t; + + { ‚ðƒp[ƒX‚µ‚āA“¾‚ç‚ꂽ\•¶–Ø‚ð•Ô‚·B + ˜AŒ‹‚w‚x‚ð‰ðÍ‚·‚é} + function term: REpNode_t; + + { ‚ðƒp[ƒX‚µ‚āA“¾‚ç‚ꂽ\•¶–Ø‚ð•Ô‚·B + ŒJ‚è•Ô‚µX*, X+‚ð‰ðÍ‚·‚é} + function factor: REpNode_t; + + { ‚ðƒp[ƒX‚µ‚āA“¾‚ç‚ꂽ\•¶–Ø‚ð•Ô‚·B + •¶Žš‚»‚Ì‚à‚̂ƁAŠ‡ŒÊ‚ÅŠ‡‚ç‚ꂽ³‹K•\Œ» (X) ‚ð‰ðÍ‚·‚é} + function primary: REpNode_t; + + { ‚ðƒp[ƒX‚µ‚āA“¾‚ç‚ꂽ\•¶–Ø‚ð•Ô‚·B + [ abcd] ‚ÅŠ‡‚ç‚ꂽ³‹K•\Œ»‚ð‰ðÍ‚·‚é} + function CharacterClass(aParseTree: TREParseTree): REpNode_t; + + { ‚ðƒp[ƒX‚µ‚āA“¾‚ç‚ꂽ\•¶–Ø‚ð•Ô‚·B + [^abcd] ‚ÅŠ‡‚ç‚ꂽ³‹K•\Œ»‚ð‰ðÍ‚·‚é} + function NegativeCharacterClass: REpNode_t; + + public + constructor Create(RegExpStr: String); + destructor Destroy; override; + + {³‹K•\Œ»‚ðƒp[ƒX‚·‚éB + regexp, term, factor, primary, charclass ‚ÌŠeƒƒ\ƒbƒh‚ðŽg‚¢Ä‹A‰º~–@ + ‚É‚æ‚Á‚ĉðÍ‚·‚éB} + procedure Run; + + {\•¶–Ø‚ðŠÇ—‚·‚éƒIƒuƒWƒFƒNƒg} + property ParseTree: TREParseTree read FParseTree; + + {“ü—Í•¶Žš—ñ‚©‚çƒg[ƒNƒ“‚ðØ‚èo‚·ƒIƒuƒWƒFƒNƒg} + property Scanner: TREScanner read FScanner; + +{$IFDEF DEBUG} + {ƒAƒEƒgƒ‰ƒCƒ“EƒRƒ“ƒgƒ[ƒ‹‚ɍ\•¶–؂̐}‚ð‘‚«o‚·ƒƒ\ƒbƒh} + procedure WriteParseTreeToOutLine(anOutLine: TOutLine); +{$ENDIF} + end; + +{$IFDEF DEBUG} + function DebugWCharToStr(WChar: WChar_t): String; +{$ENDIF} + +{ -============================== TRE_NFA Class ==================================-} +type + RE_pNFANode_t = ^RE_NFANode_t; + + { NFAó‘Ô•\‚Ìß + RE_NFANode_t ‚Í 1‚‚̂m‚e‚`ó‘Ô‚ªAƒLƒƒƒ‰ƒNƒ^ƒNƒ‰ƒX(CharClass)“à‚Ì•¶Žš‚É‚æ‚Á + ‚Ä‘JˆÚ‚·‚é‚m‚e‚`ó‘Ԃ̏ó‘Ԕԍ†(TransitTo)‚ðŠi”[‚·‚éB + ‚P‚‚̂m‚e‚`ó‘Ô‚Ö“ü—Í‚³‚ê‚éƒLƒƒƒ‰ƒNƒ^ƒNƒ‰ƒX–ˆ‚ɃŠƒ“ƒNEƒŠƒXƒg‚ðŒ`¬‚·‚é} + RE_NFANode_t = record + CharClass: RECharClass_t;{ “ü—Í : CharClass.StartChar ` CharClass.EndChar} + TransitTo: integer; { ‘JˆÚæF FStateList‚̃Cƒ“ƒfƒbƒNƒX} + + Next: RE_pNFANode_t; { ƒŠƒ“ƒNƒŠƒXƒg‚ÌŽŸß} + end; + +{œ \•¶–Ø‚ð‰ðÍ‚µ‚ÄNFAó‘Ô•\‚ðì‚éƒNƒ‰ƒX} + TRE_NFA = class + private + FStateList: TList; + FEntryState: Integer; + FExitState: Integer; + FParser: TREParser; + FRegExpHasLHead: Boolean; + FRegExpHasLTail: Boolean; + FLHeadWChar: WChar_t; + FLTailWChar: WChar_t; + protected + { ƒm[ƒh‚ɔԍ†‚ðŠ„‚è“–‚Ä‚é} + function NumberNode: Integer; + + { NFAó‘Ôß ‚ð‚P‚쐬} + function MakeNFANode: RE_pNFANode_t; + + { FStateList‚ɏó‘Ô‘JˆÚ‚ð’ljÁ‚·‚éB + ó‘Ô TransFrom ‚ɑ΂µ‚āAChrClass‚Ì‚Æ‚«‚ɏó‘Ô TransTo ‚Ö‚Ì‘JˆÚ‚ð’ljÁ‚·‚éB} + procedure AddTransition(TransFrom, TransTo: Integer; aCharClass: RECharClass_t); + + { \•¶–Ø pTree ‚ɑ΂·‚é StateList‚𐶐¬‚·‚é + NFA‚Ì“ü‚èŒû‚ðentry, oŒû‚ðway_out‚Æ‚·‚é } + procedure GenerateStateList(pTree: REpNode_t; entry, way_out: Integer); + + { NFAó‘Ô•\‚ð”jŠü‚·‚é} + procedure DisposeStateList; + + public + constructor Create(Parser: TREParser; LHeadWChar, LTailWChar: WChar_t); + destructor Destroy;override; + + { \•¶–Ø Tree‚ɑΉž‚·‚éNFA‚𐶐¬‚·‚é} + procedure Run; + + {NFA ó‘Ԃ̃ŠƒXƒg} + property StateList: TList read FStateList; + + {NFA‚̏‰Šúó‘Ô‚ÌFStateList‚̃Cƒ“ƒfƒbƒNƒX} + property EntryState: Integer read FEntryState; + {NFA‚̏I—¹ó‘Ô‚ÌFStateList‚̃Cƒ“ƒfƒbƒNƒX} + property ExitState: Integer read FExitState; + + {³‹K•\Œ»‚ªA•¶“ªƒƒ^ƒLƒƒƒ‰ƒNƒ^‚ðŠÜ‚Þ‚©} + property RegExpHasLHead: Boolean read FRegExpHasLHead; + {³‹K•\Œ»‚ªA•¶”öƒƒ^ƒLƒƒƒ‰ƒNƒ^‚ðŠÜ‚Þ‚©} + property RegExpHasLTail: Boolean read FRegExpHasLTail; + + {•¶“ª‚ð•\‚·ƒƒ^ƒLƒƒƒ‰ƒNƒ^ '^'‚É—^‚¦‚郆ƒj[ƒN‚ȃLƒƒƒ‰ƒNƒ^ƒR[ƒh} + property LHeadWChar: WChar_t read FLHeadWChar write FLHeadWChar; + {•¶”ö‚ð•\‚·ƒƒ^ƒLƒƒƒ‰ƒNƒ^ '$'‚É—^‚¦‚郆ƒj[ƒN‚ȃLƒƒƒ‰ƒNƒ^ƒR[ƒh} + property LTailWChar: WChar_t read FLTailWChar write FLTailWChar; + +{$IFDEF DEBUG} + {TStringsƒIƒuƒWƒFƒNƒg‚ɁANFA ‚Ì“à—e‚ð‘‚«ž‚Þ} + procedure WriteNFAtoStrings(Strings: TStrings); +{$ENDIF} + end; + +{ -========================== TRE_NFAStateSet Class =============================-} +{œ NFA‚̏ó‘ԏW‡‚ð•\‚·ƒIƒuƒWƒFƒNƒg + “à•”‚ł̓rƒbƒgƒxƒNƒ^‚ŏó‘ԏW‡‚ðŽÀŒ»‚µ‚Ä‚¢‚éB} + TRE_NFAStateSet = class + private + FpArray: PByteArray; + FCapacity: Integer; + public + {ƒRƒ“ƒXƒgƒ‰ƒNƒ^‚ɂ́AÅ‘åó‘Ԑ”‚ðŽw’è‚·‚éB} + constructor Create(StateMax: Integer); + destructor Destroy; override; + + {ƒIƒuƒWƒFƒNƒg‚̏W‡‚ªAStateIndex‚ðŠÜ‚Þ‚©H} + function Has(StateIndex: Integer): Boolean; + {ƒIƒuƒWƒFƒNƒg‚̏W‡‚ªAAStateSet‚Æ“¯‚¶W‡ó‘Ô‚©H} + function Equals(AStateSet: TRE_NFAStateSet): Boolean; + {ƒIƒuƒWƒFƒNƒg‚̏W‡‚ÉStateIndex‚ðŠÜ‚ß‚éB} + procedure Include(StateIndex: Integer); + {ƒIƒuƒWƒFƒNƒg‚ªŽ‚ƒoƒCƒg”z—ñ‚ւ̃|ƒCƒ“ƒ^} + property pArray: PByteArray read FpArray; + {ƒIƒuƒWƒFƒNƒg‚ªŽ‚ƒoƒCƒg”z—ñ‚Ì—v‘f”} + property Capacity: Integer read FCapacity; + end; + +{ -============================= TRE_DFA Class ==================================-} +{œ TRE_DFA NFAó‘Ô•\‚©‚çDFAó‘Ô•\‚ðì‚éƒNƒ‰ƒX + ƒRƒ“ƒXƒgƒ‰ƒNƒ^ Create ‚ɁA³‹K•\Œ»‚ð•\‚·‚m‚e‚`(”ñŒˆ’萫—LŒÀƒI[ƒgƒ}ƒgƒ“ + Non-deterministic Finite Automaton)‚̏ó‘Ô•\‚ðŽ‚ÂTRE_NFA‚ðŽó‚¯Žæ‚èA + ‘Ήž‚·‚é‚c‚e‚`(Œˆ’萫—LŒÀƒI[ƒgƒ}ƒgƒ“Deterministic Finite Automaton) + ‚̏ó‘ÔƒŠƒXƒgƒIƒuƒWƒFƒNƒg‚ð\’z‚·‚éTRE_DFAƒNƒ‰ƒXB} + + RE_pDFATransNode_t = ^RE_DFATransNode_t; + + {TRE_DFA‚̃ƒ\ƒbƒhCompute_Reachable_N_state(DState: PD_state_t): RE_pDFATransNode_t; + ‚ª‚±‚ÌŒ^‚Ì’l‚ð•Ô‚·B + ƒLƒƒƒ‰ƒNƒ^ƒNƒ‰ƒX(CharClass)‚Å‘JˆÚ‰Â”\‚È‚m‚e‚`ó‘ԏW‡(ToNFAStateSet)} + RE_DFATransNode_t = record + CharClass: RECharClass_t;{Char;} + ToNFAStateSet: TRE_NFAStateSet; + + next: RE_pDFATransNode_t;{ƒŠƒ“ƒNƒŠƒXƒg‚ðŒ`¬} + end; + + RE_pDFAStateSub_t = ^RE_DFAStateSub_t; + RE_pDFAState_t = ^RE_DFAState_t; + + { RE_DFAState_t‚É‚æ‚Á‚ÄŽg—p‚³‚ê‚é + ƒLƒƒƒ‰ƒNƒ^ƒNƒ‰ƒX(CharClass)‚É‚æ‚Á‚ÄDFAó‘Ô(TransitTo) ‚Ö‘JˆÚ‚·‚éB} + RE_DFAStateSub_t = record + CharClass: RECharClass_t; + TransitTo: RE_pDFAState_t; {CharClass”͈͓à‚Ì•¶Žš‚Å DFA ó‘Ô TransitTo‚Ö} + + next: RE_pDFAStateSub_t; {ƒŠƒ“ƒNƒŠƒXƒg‚ÌŽŸ‚̃f[ƒ^} + end; + + { RE_DFAState_t‚Í‚c‚e‚`ó‘Ô‚ð•\‚·Œ^} + RE_DFAState_t = record + StateSet: TRE_NFAStateSet; {‚±‚ÌDFAó‘Ô‚ð•\‚·NFAó‘ԏW‡} + Visited: wordbool; { ˆ—Ï‚Ý‚È‚ç‚P} + Accepted: wordbool;{ StateSetƒtƒB[ƒ‹ƒh‚ªNFA‚̏I—¹ó‘Ô‚ðŠÜ‚Þ‚È‚ç‚P} + Next: RE_pDFAStateSub_t; { ƒLƒƒƒ‰ƒNƒ^ƒNƒ‰ƒX–ˆ‚Ì‘JˆÚæ‚̃Šƒ“ƒNƒŠƒXƒg} + end; + +{ œ NFAó‘Ô•\‚©‚çDFAó‘Ô•\‚ðì‚éƒNƒ‰ƒX} + TRE_DFA = class + private + FStateList: TList; + FpInitialState: RE_pDFAState_t; + FNFA: TRE_NFA; + + FRegExpIsSimple: Boolean; + FSimpleRegExpStr: String; + FRegExpHasLHead: Boolean; + FRegExpHasLTail: Boolean; + protected + { NFAó‘ԏW‡ StateSet ‚ɑ΂µ‚Ä ƒÃ-closure‘€ì‚ðŽÀs‚·‚éB + ƒÃ‘JˆÚ‚Å‘JˆÚ‰Â”\‚È‘S‚Ä‚Ì‚m‚e‚`ó‘Ô‚ð’ljÁ‚·‚é} + procedure Collect_Empty_Transition(StateSet: TRE_NFAStateSet); + + { NFAó‘ԏW‡ aStateSet ‚ð‚c‚e‚`‚É“o˜^‚µ‚āA‚c‚e‚`ó‘Ԃւ̃|ƒCƒ“ƒ^‚ð•Ô‚·B + aStateSet‚ªI—¹ó‘Ô‚ðŠÜ‚ñ‚Å‚¢‚ê‚΁Aacceptedƒtƒ‰ƒO‚ðƒZƒbƒg‚·‚éB + ‚·‚Å‚ÉaStateSet‚ª‚c‚e‚`‚É“o˜^‚³‚ê‚Ä‚¢‚½‚牽‚à‚µ‚È‚¢} + function Register_DFA_State(var aStateSet: TRE_NFAStateSet): RE_pDFAState_t; + + { ˆ—Ï‚݂̈󂪂‚¢‚Ä‚¢‚È‚¢‚c‚e‚`ó‘Ô‚ð’T‚·B + Œ©‚‚©‚ç‚È‚¯‚ê‚Înil‚ð•Ô‚·B} + function Fetch_Unvisited_D_state: RE_pDFAState_t; + + { DFAó‘ÔpDFAState‚©‚ç‘JˆÚ‰Â”\‚ÈNFAó‘Ô‚ð’T‚µ‚āAƒŠƒXƒg‚É‚µ‚Ä•Ô‚·} + function Compute_Reachable_N_state(pDFAState: RE_pDFAState_t): RE_pDFATransNode_t; + + { Compute_Reachable_N_stateƒƒ\ƒbƒh‚©ì‚é RE_DFATransNode_tŒ^‚̃Šƒ“ƒNƒŠƒXƒg‚ð + ”pŠü‚·‚é} + procedure Destroy_DFA_TransList(pDFA_TransNode: RE_pDFATransNode_t); + + { NFA‚𓙉¿‚È‚c‚e‚`‚Ö‚Æ•ÏŠ·‚·‚é} + procedure Convert_NFA_to_DFA; + + { StateList‚ÌŠeƒŠƒ“ƒNƒŠƒXƒg‚ðƒ\[ƒg‚·‚é} + procedure StateListSort; + + procedure CheckIfRegExpIsSimple; + procedure DestroyStateList; + public + constructor Create(NFA: TRE_NFA); + destructor Destroy; override; + + procedure Run; + + property StateList: TList read FStateList; + + property pInitialState: RE_pDFAState_t read FpInitialState; + + {³‹K•\Œ»‚ª’Pƒ‚È•¶Žš—ñ‚©H} + property RegExpIsSimple: Boolean read FRegExpIsSimple; + {³‹K•\Œ»‚Æ“™‰¿‚È’Pƒ‚È•¶Žš—ñ} + property SimpleRegExpStr: String read FSimpleRegExpStr; + + {³‹K•\Œ»‚ªA•¶“ªƒƒ^ƒLƒƒƒ‰ƒNƒ^‚ðŠÜ‚Þ‚©} + property RegExpHasLHead: Boolean read FRegExpHasLHead; + {³‹K•\Œ»‚ªA•¶”öƒƒ^ƒLƒƒƒ‰ƒNƒ^‚ðŠÜ‚Þ‚©} + property RegExpHasLTail: Boolean read FRegExpHasLTail; + {$IFDEF DEBUG} + {TStringsƒIƒuƒWƒFƒNƒg‚ɁADFA ‚Ì“à—e‚ð‘‚«ž‚Þ} + procedure WriteDFAtoStrings(Strings: TStrings); +{$ENDIF} + end; + +{ -=================== TRegularExpression Class ==============================-} + {TStringList ‚ÉŠi”[‚Å‚«‚鍀–ڐ”‚͈̔͌^} + RE_IndexRange_t = 1..Classes.MaxListSize; + +{œ ³‹K•\Œ»•¶Žš—ñ‚©‚ç‚c‚e‚`ó‘Ô•\‚ðì‚éƒNƒ‰ƒX} + TRegularExpression = class(TComponent) + private + protected + FLineHeadWChar: WChar_t; + FLineTailWChar: WChar_t; + {ƒvƒŠƒvƒƒZƒbƒT‚ð’Ê‚é‘O‚̐³‹K•\Œ»} + FRegExp: String; + {³‹K•\Œ»‚Ì•¶Žš—ñƒŠƒXƒgBObjectsƒvƒƒpƒeƒB‚ÉT‚c‚e‚`ƒIƒuƒWƒFƒNƒg‚ðŽ‚Â} + FRegExpList: TStringList; + {FRegExpList‚ÉŠi”[‚·‚鍀–ڐ”‚̍őå’lB ƒfƒtƒHƒ‹ƒg 30} + FRegExpListMax: RE_IndexRange_t; + {Œ»ÝŽw’肳‚ê‚Ä‚¢‚鐳‹K•\Œ» RegExp‚̐³‹K•\Œ»•¶Žš—ñƒŠƒXƒgRegExpList’†‚Å‚Ì + ƒCƒ“ƒfƒbƒNƒX + ¦ FRegExpList[FCurrentIndex] = RegExp} + FCurrentIndex: Integer; + {“¯ˆÓŒêˆ—ƒvƒŠƒvƒƒZƒbƒT} + FPreProcessor: TREPreProcessor; + + { “à•”Žg—p‚Ì‚½‚߂̎葱‚«EŠÖ”} + {***** ³‹K•\Œ»•¶Žš—ñ¨\•¶–؍\‘¢¨NFA¨DFA ‚Ì•ÏŠ·‚ðs‚¤ *****} + procedure Translate(RegExpStr: String); virtual; + + {³‹K•\Œ»ƒŠƒXƒg(RegExpList: TStringList)‚ÆObjectsƒvƒƒpƒeƒB‚ÉŒ‹‚Ñ•t‚¯‚ç‚ꂽ + TRE_DFAƒIƒuƒWƒFƒNƒg‚ð”jŠü} + procedure DisposeRegExpList; + + {ƒvƒƒpƒeƒBEƒAƒNƒZƒXEƒƒ\ƒbƒh} + procedure SetRegExp(Str: String); virtual; + function GetProcessedRegExp: String; + function GetListOfFuzzyCharDic: TList; + function GetListOfSynonymDic: TList; + function GetRegExpIsSimple: Boolean; + function GetSimpleRegExp: String; + function GetHasLHead: Boolean; + function GetHasLTail: Boolean; + function GetUseFuzzyCharDic: Boolean; + procedure SetUseFuzzyCharDic(Val: Boolean); + function GetUseSynonymDic: Boolean; + procedure SetUseSynonymDic(Val: Boolean); + function GetLineHeadWChar: WChar_t; virtual; + function GetLineTailWChar: WChar_t; virtual; + {DFAƒIƒuƒWƒFƒNƒgŠÖ˜Aƒƒ\ƒbƒh} + {Œ»ÝŽw’肳‚ê‚Ä‚¢‚鐳‹K•\Œ»‚ɑΉž‚·‚é‚c‚e‚`ó‘Ô•\‚̏‰Šúó‘Ԃւ̃|ƒCƒ“ƒ^‚ð•Ô‚·} + function GetpInitialDFAState: RE_pDFAState_t; + {Œ»ÝŽw’肳‚ê‚Ä‚¢‚鐳‹K•\Œ»‚ɑΉž‚·‚éTRE_DFAƒIƒuƒWƒFƒNƒg‚ð•Ô‚·} + function GetCurrentDFA: TRE_DFA; + {ó‘Ô DFAstate‚©‚當Žš‚ƒ‚É‚æ‚Á‚Ä‘JˆÚ‚µ‚āA‘JˆÚŒã‚̏ó‘Ô‚ð•Ô‚·B + •¶Žš‚ƒ‚É‚æ‚Á‚Ä‘JˆÚo—ˆ‚È‚¯‚ê‚Înil‚ð•Ô‚·} + function NextDFAState(DFAState: RE_pDFAState_t; c: WChar_t): RE_pDFAState_t; + {DFAó‘Ô•\‚Ì’†‚Å•¶“ªƒƒ^ƒLƒƒƒ‰ƒNƒ^‚ð•\‚·ƒLƒƒƒ‰ƒNƒ^ƒR[ƒh} + property LineHeadWChar: WChar_t read GetLineHeadWChar; + {DFAó‘Ô•\‚Ì’†‚Å•¶”öƒƒ^ƒLƒƒƒ‰ƒNƒ^‚ð•\‚·ƒLƒƒƒ‰ƒNƒ^ƒR[ƒh} + property LineTailWChar: WChar_t read GetLineTailWChar; + + {³‹K•\Œ»ŠÖ˜AƒvƒƒpƒeƒB} + {Œ»ÝŽw’肳‚ê‚Ä‚¢‚鐳‹K•\Œ»} + property RegExp: String read FRegExp write SetRegExp; + + {Œ»ÝŽw’肳‚ê‚Ä‚¢‚鐳‹K•\Œ»‚É“¯ˆÓŒêˆ—‚ðŽ{‚µ‚½‚à‚Ì} + property ProcessedRegExp: String read GetProcessedRegExp; + + {³‹K•\Œ»‚ª’Pƒ‚È•¶Žš—ñ‚©H} + property RegExpIsSimple: Boolean read GetRegExpIsSimple; + {³‹K•\Œ»‚Æ“™‰¿‚È’Pƒ‚È•¶Žš—ñ(¦RegExpIsSimple=False‚ÌŽž‚̓kƒ‹•¶Žš—ñ)} + property SimpleRegExp: String read GetSimpleRegExp; + + {³‹K•\Œ»‚ªA•¶“ªƒƒ^ƒLƒƒƒ‰ƒNƒ^‚ðŠÜ‚Þ‚©} + property HasLHead: Boolean read GetHasLHead; + {³‹K•\Œ»‚ªA•¶”öƒƒ^ƒLƒƒƒ‰ƒNƒ^‚ðŠÜ‚Þ‚©} + property HasLTail: Boolean read GetHasLTail; + + {Ž«‘ŠÖ˜AƒvƒƒpƒeƒB} + {•¶Žš“¯ˆêŽ‹Ž«‘‚ðŽg‚¤^Žg‚í‚È‚¢Žw’è} + property UseFuzzyCharDic: Boolean read GetUseFuzzyCharDic write SetUseFuzzyCharDic; + {•¶Žš‚Ì“¯ˆêŽ‹Ž«‘‚̃ŠƒXƒg} + property ListOfFuzzyCharDic: TList read GetListOfFuzzyCharDic; + + {“¯ˆÓŒêŽ«‘‚ðŽg‚¤^Žg‚í‚È‚¢Žw’è} + property UseSynonymDic: Boolean read GetUseSynonymDic write SetUseSynonymDic; + {“¯ˆÓŒêŽ«‘‚̃ŠƒXƒg} + property ListOfSynonymDic: TList read GetListOfSynonymDic; + public + constructor Create(AOwner: TComponent); override; + destructor Destroy; override; + end; + +{ -========================== TAWKStr Class ==================================-} + TMatchCORE_LineSeparator = (mcls_CRLF, mcls_LF); + + TMatchCORE = class(TRegularExpression) + private + FLineSeparator: TMatchCORE_LineSeparator; + protected + function IsLineEnd(WChar: WChar_t): Boolean; + property LineSeparator: TMatchCORE_LineSeparator + read FLineSeparator write FLineSeparator; + protected + + {à–¾ F ƒ}ƒbƒ` + (³‹K•\Œ»‚ªs“ª^s––ƒƒ^ƒLƒƒƒ‰ƒNƒ^‚ðŠÜ‚Ü‚È‚¢‚Æ‚«—p) + “®ì F + ˆø” F pText: PChar ƒkƒ‹ƒLƒƒƒ‰ƒNƒ^‚ŏI‚í‚錟õ‘Ώە¶Žš—ñ‚ւ̃|ƒCƒ“ƒ^ + •›ì—pF pStart:PChar ƒ}ƒbƒ`‚µ‚½•”•ª‚̐擪•¶Žš‚ւ̃|ƒCƒ“ƒ^ + pEnd :PChar ƒ}ƒbƒ`‚µ‚½•”•ª‚ÌŽŸ‚Ì•¶Žš‚ւ̃|ƒCƒ“ƒ^ + ’ˆÓ F ƒ}ƒbƒ`‚µ‚½•”•ª‚̃oƒCƒg”‚́ApEnd - pStart‚Å“¾‚ç‚ê‚éB} + procedure MatchStd(pText: PChar; var pStart, pEnd: PChar); + + + {à–¾ F ƒ}ƒbƒ`(³‹K•\Œ»‚ªs“ª^s––ƒƒ^ƒLƒƒƒ‰ƒNƒ^‚ðŠÜ‚Þ‚Æ‚«—p) + “®ì F + ˆø” F pText: PChar ƒkƒ‹ƒLƒƒƒ‰ƒNƒ^‚ŏI‚í‚錟õ‘Ώە¶Žš—ñ‚ւ̃|ƒCƒ“ƒ^ + •›ì—pF pStart:PChar ƒ}ƒbƒ`‚µ‚½•”•ª‚̐擪•¶Žš‚ւ̃|ƒCƒ“ƒ^ + pEnd :PChar ƒ}ƒbƒ`‚µ‚½•”•ª‚ÌŽŸ‚Ì•¶Žš‚ւ̃|ƒCƒ“ƒ^ + ’ˆÓ F ƒ}ƒbƒ`‚µ‚½•”•ª‚̃oƒCƒg”‚́ApEnd - pStart‚Å“¾‚ç‚ê‚éB} + procedure MatchEX(pText: PChar; var pStart, pEnd: PChar); + + {à–¾ F ƒ}ƒbƒ`(“à•”ˆ——pB³‹K•\Œ»‚ªs“ª^s––ƒƒ^ƒLƒƒƒ‰ƒNƒ^‚ðŠÜ‚Þ‚Æ‚«—p) + “®ì F MatchEx_Headƒƒ\ƒbƒh‚Ƃ̈Ⴂ‚́Aˆø”pText‚ªs‚Ì“r’†‚ðƒ|ƒCƒ“ƒg‚µ‚Ä + ‚¢‚é‚à‚Ì‚Æ‚µ‚āAs“ªƒƒ^ƒLƒƒƒ‰ƒNƒ^‚Ƀ}ƒbƒ`‚µ‚È‚¢‚±‚ƁB + ˆø” F pText: PChar ƒkƒ‹ƒLƒƒƒ‰ƒNƒ^‚ŏI‚í‚錟õ‘Ώە¶Žš—ñ‚ւ̃|ƒCƒ“ƒ^ + (s‚Ì’†‚ðŽw‚µ‚Ä‚¢‚é‚à‚Ì‚Æ‚µ‚Ĉµ‚¤B) + •›ì—pF pStart:PChar ƒ}ƒbƒ`‚µ‚½•”•ª‚̐擪•¶Žš‚ւ̃|ƒCƒ“ƒ^ + pEnd :PChar ƒ}ƒbƒ`‚µ‚½•”•ª‚ÌŽŸ‚Ì•¶Žš‚ւ̃|ƒCƒ“ƒ^ + ’ˆÓ F ƒ}ƒbƒ`‚µ‚½•”•ª‚̃oƒCƒg”‚́ApEnd - pStart‚Å“¾‚ç‚ê‚éB} + procedure MatchEX_Inside(pText: PChar; var pStart, pEnd: PChar); + +{----------------ƒ}ƒbƒ` ‰º¿‚¯ -------------} +{MatchHead, MatchInside‚́Aˆø” pText‚ªŽw‚·•¶Žš‚ðæ“ª‚Æ‚µ‚ă}ƒbƒ`‚·‚é‚©‚ðŒŸ¸‚·‚é} + + {à–¾ F pText‚́A‚ ‚镶Žš—ñ‚̍s“ª‚ðƒ|ƒCƒ“ƒg‚µ‚Ä‚¢‚é‚à‚Ì‚ÆŒ©‚È‚·B + ‚µ‚½‚ª‚Á‚āApText‚ªŽw‚·•¶Žš‚͍s“ªƒƒ^ƒLƒƒƒ‰ƒNƒ^‚Ƀ}ƒbƒ`‚·‚éB + s––ƒƒ^ƒLƒƒƒ‰ƒNƒ^‚ðl—¶‚·‚éB + ˆø” F pText: PChar ŒŸõ‘Ώە¶Žš—ñ(s‚̍ŏ‰‚Ì•¶Žš‚ðŽw‚·) + pDFAState ‰Šú’l‚Æ‚µ‚ÄŽg‚¤DFAó‘Ô•\‚Ì‚Pó‘Ô + •Ô‚è’lF ƒ}ƒbƒ`‚µ‚½•”•ª•¶Žš—ñ‚ÌŽŸ‚Ì•¶ŽšB + ƒ}ƒbƒ`‚µ‚½•”•ª•¶Žš—ñ‚̃oƒCƒg’·‚́Aresult - pText + ’ˆÓ F } + function MatchHead(pText: PChar; pDFAState: RE_pDFAState_t): PChar; + + {à–¾ F pText‚́A‚ ‚镶Žš—ñ‚Ì’†(s“ª‚Å‚Í‚È‚¢)‚ðƒ|ƒCƒ“ƒg‚µ‚Ä‚¢‚é‚à‚Ì‚ÆŒ©‚È‚·B + ‚µ‚½‚ª‚Á‚āApText‚ªŽw‚·•¶Žš‚͍s“ªƒƒ^ƒLƒƒƒ‰ƒNƒ^‚Ƀ}ƒbƒ`‚µ‚È‚¢B + s––ƒƒ^ƒLƒƒƒ‰ƒNƒ^‚ðl—¶‚·‚éB + ˆø” F pText: PChar ŒŸõ‘Ώە¶Žš—ñ(s’†‚Ì•¶Žš‚ðŽw‚·) + pDFAState ‰Šú’l‚Æ‚µ‚ÄŽg‚¤DFAó‘Ô•\‚Ì‚Pó‘Ô + •Ô‚è’lF ƒ}ƒbƒ`‚µ‚½•”•ª•¶Žš—ñ‚ÌŽŸ‚Ì•¶ŽšB + ƒ}ƒbƒ`‚µ‚½•”•ª•¶Žš—ñ‚̃oƒCƒg’·‚́Aresult - pText + ’ˆÓ F } + function MatchInside(pText: PChar; pDFAState: RE_pDFAState_t): PChar; + public + constructor Create(AOwner: TComponent); override; + end; + +{ -========================== TAWKStr Class ==================================-} + TAWKStrMatchProc = procedure(pText: PChar; var pStart, pEnd: PChar) of object; + +{œ AWKŒ¾Œê‚Ì•¶Žš—ñ‘€ìŠÖ”ŒQ‚ðDelphi‚ÅŽÀŒ»‚·‚éƒNƒ‰ƒX TAWKStr} + TAWKStr = class(TMatchCORE) + private + FMatchProc: TAWKStrMatchProc; + protected + procedure SetRegExp(Str: String); override; + {Sub, GSubƒƒ\ƒbƒh‚ÅŽg—pB '&'‚ðƒ}ƒbƒ`‚µ‚½•¶Žš—ñ‚É’uŠ·‚¦‚é} + function Substitute_MatchStr_For_ANDChar(Text: String; MatchStr: String): String; + public + constructor Create(AOwner: TComponent); override; + function ProcessEscSeq(Text: String): String; + + {•¶Žš‚Ì“¯ˆêŽ‹Ž«‘‚̃ŠƒXƒg} + property ListOfFuzzyCharDic; + {“¯ˆÓŒêŽ«‘‚̃ŠƒXƒg} + property ListOfSynonymDic; + + {³‹K•\Œ»‚ªA•¶“ªƒƒ^ƒLƒƒƒ‰ƒNƒ^‚ðŠÜ‚Þ‚©} + property HasLHead; + {³‹K•\Œ»‚ªA•¶”öƒƒ^ƒLƒƒƒ‰ƒNƒ^‚ðŠÜ‚Þ‚©} + property HasLTail; + + property ProcessedRegExp; + + {DFAó‘Ô•\‚Ì’†‚Å•¶“ªƒƒ^ƒLƒƒƒ‰ƒNƒ^‚ð•\‚·ƒLƒƒƒ‰ƒNƒ^ƒR[ƒh} + property LineHeadWChar; + {DFAó‘Ô•\‚Ì’†‚Å•¶”öƒƒ^ƒLƒƒƒ‰ƒNƒ^‚ð•\‚·ƒLƒƒƒ‰ƒNƒ^ƒR[ƒh} + property LineTailWChar; + + function Match(Text: String; var RStart, RLength: Integer): Integer; + + function Sub(SubText: String; var Text: String): Boolean; + + function GSub(SubText: String; var Text: String): Integer; + + function Split(Text: String; StrList: TStrings): Integer; + published + property RegExp; + {s‚Ì‹æØ‚蕶ŽšŽw’è} + property LineSeparator; + + {•¶Žš‚Ì“¯ˆêŽ‹Ž«‘‚ðŽg‚¤‚©} + property UseFuzzyCharDic; + {“¯ˆÓŒêŽ«‘‚ðŽg‚¤‚©} + property UseSynonymDic; + + end; + +{ -========================== —áŠOƒNƒ‰ƒX =====================================-} + EEndOfFile = class(EInOutError); + + EFileNotFound = class(EInOutError); + + EGrepCancel = class(Exception); + +{ -=========================== TTxtFile Class ================================-} + {TTextFileƒNƒ‰ƒX‚ÌGetThisLine‚ª•Ô‚·ƒtƒ@ƒCƒ‹’†‚Ì‚Ps‚̏î•ñ‚ð•\‚·Œ^} + RE_LineInfo_t = record + Line: String; + LineNo: Integer; {s”ԍ†} + end; + +{œ TTxtFile ƒeƒLƒXƒgƒtƒ@ƒCƒ‹EƒAƒNƒZƒXEƒNƒ‰ƒX} + TTxtFile = Class + private + protected + public + FBuffSize: Integer; {ƒoƒbƒtƒ@‚̃TƒCƒY} + FTailMargin: Integer; + FpBuff: PChar; {“ǂݍž‚݃oƒbƒtƒ@‚ւ̃|ƒCƒ“ƒ^} + + FFileName: String; {ˆ—‘Ώۃtƒ@ƒCƒ‹–¼ iƒtƒ‹ƒpƒX•\‹Lj} + FF: File; {FFileName ‚ÉŠÖ˜A•t‚¯‚ç‚ê‚éŒ^‚È‚µƒtƒ@ƒCƒ‹•Ï”} + FFileOpened: Boolean; + + {ƒoƒbƒtƒ@’†‚Ì•¶ŽšˆÊ’u‚ð•\‚·d—v‚ȃ|ƒCƒ“ƒ^‚RŽí—Þ} + FpBase: PChar; {•¶’†‚ÅŒŸõ‘ΏۂƂȂ镔•ª•¶Žš—ñ‚̐擪‚ðŽw‚·} + FpLineBegin: PChar; {FpBase‚ªŽw‚·•¶‚̐擪•¶Žš‚ւ̃|ƒCƒ“ƒ^} + FpForward: PChar; {ŒŸõ’†‚Ì•¶Žš‚ւ̃|ƒCƒ“ƒ^} + + FLineNo: Integer; {Œ»Ý‚̍s”ԍ†} + FReadCount: Integer;{BlockRead ‚ʼn½ƒoƒCƒg“ǂݍž‚ñ‚¾‚©B} + FBrokenLine: String;{ƒoƒbƒtƒ@‚Ì‹«ŠE‚Å•ª’f‚³‚ꂽ•¶‚Ì‘O”¼•”•ª} + + FpCancelRequest: ^Boolean; + {IncPBaseƒƒ\ƒbƒh‚ÅFpBase‚ªƒkƒ‹EƒLƒƒƒ‰ƒNƒ^‚ðŽw‚µ‚½‚Æ‚«‚̏ˆ—} + procedure IncPBaseNullChar(Ch: Char); + {GetCharƒƒ\ƒbƒh‚ÅFpForward‚ªƒkƒ‹EƒLƒƒƒ‰ƒNƒ^‚ðŽw‚µ‚½‚Æ‚«‚̏ˆ—} + procedure GetCharNullChar(Ch: Char); + + constructor Create(aFileName: String; var CancelRequest: Boolean); + destructor Destroy; override; + procedure BuffRead(pBuff: PChar); + function IncPBase: Char; {FpBase‚ªŽŸ‚̃oƒCƒg‚ðŽw‚·‚悤‚É‚·‚é} + function AdvanceBase: WChar_t; + function GetChar: Char; + function GetWChar: WChar_t; + function GetThisLine: RE_LineInfo_t;{FpBase‚ªŽw‚µ‚Ä‚¢‚镶Žš‚ðŠÜ‚Þ•¶‚𓾂é} + end; + +{ -=========================== TGrep Class ==================================-} + + TGrepOnMatch = procedure (Sender: TObject; LineInfo: RE_LineInfo_t) of Object; + + TGrepGrepProc = procedure (FileName: String) of Object; + +{œ ƒtƒ@ƒCƒ‹³‹K•\Œ»ŒŸõƒNƒ‰ƒX TGrep } + TGrep = class(TRegularExpression) + private + FOnMatch: TGrepOnMatch; +// FDummyIgnoreCase: Boolean; + FCancel: Boolean; + FGrepProc: TGrepGrepProc; + protected + procedure SetRegExp(Str: String); override; + function GetLineHeadWChar: WChar_t; override; + function GetLineTailWChar: WChar_t; override; + public + constructor Create(AOwner: TComponent); override; + + procedure GrepByRegExp(FileName: String); + procedure GrepByStr(FileName: String); + + {‹@”\ Žw’肳‚ꂽƒeƒLƒXƒgEƒtƒ@ƒCƒ‹’†‚Ő³‹K•\Œ»(RegExpƒvƒƒpƒeƒB)‚Ƀ}ƒbƒ` + ‚·‚és‚ð’T‚µAŒ©‚‚¯‚邽‚Ñ‚ÉOnMatch ƒCƒxƒ“ƒgƒnƒ“ƒhƒ‰‚ðŒÄ‚яo‚µ‚Ü‚·B + + (RegExpƒvƒƒpƒeƒB‚ɐݒ肳‚ê‚Ä‚¢‚鐳‹K•\Œ»‚ðŒŸ¸‚µ‚āA•’Ê‚Ì•¶Žš—ñ‚È‚ç‚Î + GrepByStrƒƒ\ƒbƒhAƒƒ^ƒLƒƒƒ‰ƒNƒ^‚ðŠÜ‚Þ‚Æ‚«‚ÍGrepByRegExpƒƒ\ƒbƒh‚ð + ŒÄ‚яo‚µ‚Ü‚·B) + ¦ OnMatch ƒCƒxƒ“ƒgƒnƒ“ƒhƒ‰‚ªŽw’肳‚ê‚Ä‚¢‚È‚¢‚Æ‚«‚́A‰½‚à‚µ‚Ü‚¹‚ñB + + ˆø” FileNmae ŒŸõ‘Ώۂ̃eƒLƒXƒgƒtƒ@ƒCƒ‹–¼(ƒtƒ‹ƒpƒXŽw’è) + CancelRequest ŒŸõ‚ð“r’†‚ÅŽ~‚ß‚½‚¢‚Æ‚«‚ÉTrue‚É‚·‚éB + ¦ Grepƒƒ\ƒbƒh‚Í“à•”‚ŁAApplication.ProcessMessages‚ðŒÄ‚яo‚· + ‚̂ŁA‚»‚Ì‚Æ‚«‚ɁACancelRequest‚ðTrue‚ɐݒ肷‚邱‚Æ‚ª‚Å‚«‚Ü‚·B} + + {³‹K•\Œ»‚ª’Pƒ‚È•¶Žš—ñ‚©H} + property RegExpIsSimple; + {³‹K•\Œ»‚Æ“™‰¿‚È’Pƒ‚È•¶Žš—ñ(¦RegExpIsSimple=False‚ÌŽž‚̓kƒ‹•¶Žš—ñ)} + property SimpleRegExp; + + {³‹K•\Œ»‚ªA•¶“ªƒƒ^ƒLƒƒƒ‰ƒNƒ^‚ðŠÜ‚Þ‚©} + property HasLHead; + {³‹K•\Œ»‚ªA•¶”öƒƒ^ƒLƒƒƒ‰ƒNƒ^‚ðŠÜ‚Þ‚©} + property HasLTail; + + {RegExpƒvƒƒpƒeƒB‚̐³‹K•\Œ»‚É“¯ˆÓŒêˆ—‚ðŽ{‚µ‚½‚à‚Ì} + property ProcessedRegExp; + {•¶Žš‚Ì“¯ˆêŽ‹Ž«‘‚̃ŠƒXƒg} + property ListOfFuzzyCharDic; + {“¯ˆÓŒêŽ«‘‚̃ŠƒXƒg} + property ListOfSynonymDic; + + property Grep: TGrepGrepProc read FGrepProc; + published + {³‹K•\Œ»•¶Žš—ñ} + property RegExp; + {•¶Žš‚Ì“¯ˆêŽ‹Ž«‘‚ðŽg‚¤‚©} + property UseFuzzyCharDic; + {“¯ˆÓŒêŽ«‘‚ðŽg‚¤‚©} + property UseSynonymDic; + + property OnMatch: TGrepOnMatch read FOnMatch write FOnMatch; + + property Cancel: Boolean read FCancel write FCancel; + end; + + + +var + RE_FuzzyCharDic: TList; + +procedure Register; + +implementation +{************************ Implementation ************************************} +constructor ERegExpParser.Create(const Msg: string; ErrorPosition: Word); +begin + inherited Create(Msg); + ErrorPos := ErrorPosition; +end; +{ -====================== •¶Žš—ñ‘€ìŠÖ” =====================================-} +{à–¾ F ‚P‚Ui”‚ð•\‚·•¶Žš‚ðŽó‚¯Žæ‚èA®”‚É‚µ‚Ä•Ô‚·B + ˆø” F WCh: WChar_t; 16i”‚ð•\‚·1ƒoƒCƒg•¶Žš [0-9a-fA-F] + + •Ô‚è’lF ³íŽžF 0 <= result <= 15 + ˆÙíŽžF -1} +function HexWCharToInt(WCh: WChar_t): Integer; +begin + case WCh of + Ord('0')..Ord('9'): result := WCh - Ord('0'); + Ord('A')..Ord('F'): result := WCh - Ord('A')+10; + Ord('a')..Ord('f'): result := WCh - Ord('a')+10; + else result := -1; + end; +end; + +{à–¾ F ‚Wi”‚ð•\‚·•¶Žš‚ðŽó‚¯Žæ‚èA®”‚É‚µ‚Ä•Ô‚·B + ˆø” F WCh: WChar_t; 8i”‚ð•\‚·1ƒoƒCƒg•¶Žš [0-7] + + •Ô‚è’lF ³íŽžF 0 <= result <= 7 + ˆÙíŽžF -1} +function OctWCharToInt(WCh: WChar_t): Integer; +begin + case WCh of + Ord('0')..Ord('7'): result := WCh - Ord('0'); + else result := -1; + end; +end; + +{‹@”\F Str ‚©‚ç ‚P•¶Žš “¾‚é + ‰ðàF Str’†‚ÅIndex‚ªŽw‚·ˆÊ’u‚©‚ç1•¶Žš(‚QƒoƒCƒg•¶ŽšŠÜ‚Þ) “¾‚Ä‚©‚çAIndex‚ª + ŽŸ‚Ì•¶Žš‚ðŽw‚·‚悤‚ɐi‚ß‚é + ’ˆÓF Index‚ª•¶Žš—ñ‚Ì’·‚³‚æ‚è‚·‚Å‚É’·‚¢ê‡‚͏í‚É 0‚ð•Ô‚µAIndex‚𑝂₳‚È‚¢B + ‚‚܂èAIndex‚͍őå‚Å‚à Length(Str)+1 ‚Å‚ ‚éB} +function GetWChar(const Str: String; var Index: Integer): WChar_t; +begin + if (Index >= 1) and (Index <= Length(Str)) then begin + if IsDBCSLeadByte(Byte(Str[Index])) then begin + {Str‚̍Ōã‚Ì•¶Žš‚ª‚QƒoƒCƒg•¶ŽšƒR[ƒh‚Ì‚PƒoƒCƒg‚Ì‚Æ‚«‚Í—áŠO¶¬} + if Index = Length(Str) then + raise ERegExpParser.Create('•s³‚È‚QƒoƒCƒg•¶ŽšƒR[ƒh‚Å‚·B', Index); + WordRec(result).Hi := Byte(Str[Index]); + WordRec(result).Lo := Byte(Str[Index+1]); + Inc(Index, 2); + end else begin + result := Byte(Str[Index]); + Inc(Index); + end; + end else begin + result := CONST_NULL; + end; +end; + +//1997/09/25 FIX: MBUtils.pas‚ª‚È‚­‚Ä‚à“®ì‚·‚é‚悤‚ɕύX +function IsTrailByteInStr(pText: PAnsiChar; + ptr: PAnsiChar + ): Boolean; +var + p: PAnsiChar; +begin + Result := false; + if pText = ptr then Exit; + p := ptr - 1; + while (p <> pText) do + begin + if not IsDBCSLeadByte(Ord(p^)) then Break; + Dec(p); + end; + if ((ptr - p) mod 2) = 0 then Result := true; +end; + +procedure UnGetWChar(const Str: String; var Index: Integer); +begin + if Index <= 1 then + Exit + else if (Index > 2) and IsTrailByteInStr(PAnsiChar(Str), PAnsiChar(Str)+Index-2) then + Dec(Index, 2) + else + Dec(Index); +end; + +function PCharGetWChar(var pText: PChar): WChar_t; +begin + if Byte(pText^) <> CONST_NULL then begin + if IsDBCSLeadByte(Byte(pText^)) then begin + WordRec(result).Hi := Byte(pText^); + WordRec(result).Lo := Byte((pText+1)^); + Inc(pText, 2); + end else begin + result := Byte(pText^); + Inc(pText); + end; + end else begin + result := CONST_NULL; + end; +end; + +{‹@”\F WChar_tŒ^‚Ì’l‚ðStringŒ^‚Ö•ÏŠ·‚·‚éB} +function WCharToStr(WCh: WChar_t): String; +begin + if IsDBCSLeadByte(Hi(WCh)) then + result := Chr(Hi(WCh))+Chr(Lo(WCh)) + else + result := Chr(Lo(WCh)); +end; + +{‹@”\F '\' ‚Å ˆø—p‚³‚ꂽƒLƒƒƒ‰ƒNƒ^‚𓾂éB \n, \t \\ ... + ’ˆÓF Index‚Í'\'‚ÌŽŸ‚Ì•¶Žš‚ðŽw‚µ‚Ä‚¢‚é‚Æ‚·‚éB} +function GetQuotedWChar(const Str: String; var Index: Integer): WChar_t; +var + WCh: WChar_t; +begin + WCh := GetWChar(Str, Index); + if WCh = 0 then + raise ERegExpParser.Create('"\"‚ÌŽŸ‚É‚Í•¶Žš‚ª•K—v‚Å‚·B', Index); + + if WCh = CONST_b then {'b'} + result := CONST_BS {back space} + else if WCh = CONST_r then {'r'} + result := CONST_CR {Carriage Return} + else if WCh = CONST_n then {'n'} + result := CONST_LF {Line Feed} + else if WCh = CONST_t then {'t'} + result := CONST_TAB {tab} + else if WCh = CONST_x then {'x'} + result := HexStrToInt(Str, Index) + else if OctWCharToInt(WCh) >= 0 then begin + UnGetWChar(Str, Index); {WCh‚ð–ß‚·} + result := OctStrToInt(Str, Index); + end else + result := WCh; +end; + +{à–¾ F 16i”•\‹L‚Ì•¶Žš—ñ‚ðWordŒ^‚̐”’l‚É•ÏŠ·‚·‚éB +ˆø” F Str: String •ÏŠ·Œ³‚Ì•¶Žš—ñ + Index: Integer ˆø”Str‚ÌIndex”Ԗڂ̃oƒCƒgˆÊ’u‚©‚ç•ÏŠ·‚ðŽn‚ß‚éB +•Ô‚è’lF •¶Žš—ñ‚ª•\‚·WordŒ^‚Ì’l} +function HexStrToInt(const Str: String; var Index: Integer): Word; +var + Val, i: Integer; + WCh: WChar_t; +begin + result := 0; + i := 1; + WCh := GetWChar(Str, Index); + Val := HexWCharToInt(WCh); + while (WCh <> CONST_NULL) and (Val >= 0) and (i < 5) do begin + result := result * 16 + Val; + WCh := GetWChar(Str, Index); + Val := HexWCharToInt(WCh); + Inc(i); + end; + if i = 1 then + raise ERegExpParser.Create('•s³‚È‚P‚Ui”ƒR[ƒh•\‹L‚Å‚·B', Index); + if WCh <> CONST_NULL then + UnGetWChar(Str, Index); +end; + +{à–¾ F ‚Wi”•\‹L‚Ì•¶Žš—ñ‚ðWordŒ^‚̐”’l‚É•ÏŠ·‚·‚éB +ˆø” F Str: String •ÏŠ·Œ³‚Ì•¶Žš—ñ + Index: Integer ˆø”Str‚ÌIndex”Ԗڂ̃oƒCƒgˆÊ’u‚©‚ç•ÏŠ·‚ðŽn‚ß‚éB +•Ô‚è’lF •¶Žš—ñ‚ª•\‚·WordŒ^‚Ì’l} +function OctStrToInt(const Str: String; var Index: Integer): Word; +var + Val, i: Integer; + WCh: WChar_t; +begin + result := 0; + i := 1; + WCh := GetWChar(Str, Index); + Val := OctWCharToInt(WCh); + while (WCh <> CONST_NULL) and (Val >= 0) and (i < 7) do begin + if (result * 8 + Val) > $FFFF then + raise ERegExpParser.Create('•s³‚È‚Wi”ƒR[ƒh•\‹L‚Å‚·B', Index); + result := result * 8 + Val; + WCh := GetWChar(Str, Index); + Val := OctWCharToInt(WCh); + Inc(i); + end; + if i = 1 then + raise ERegExpParser.Create('•s³‚È‚Wi”ƒR[ƒh•\‹L‚Å‚·B', Index); + if WCh <> CONST_NULL then + UnGetWChar(Str, Index); +end; + +{à–¾ F FS:WChar_t‚ð‹æØ‚蕶Žš‚Æ‚µ‚āAƒoƒCƒgˆÊ’uIndex‚©‚çŽn‚Ü‚éƒg[ƒNƒ“‚ð‚P‚•Ԃ·B +ˆø” F Str: String + Index: Integer ˆø”Str‚ÌIndex”Ԗڂ̃oƒCƒgˆÊ’u‚©‚ç•ÏŠ·‚ðŽn‚ß‚éB +•Ô‚è’lF FS‚Å‹æØ‚ç‚ꂽAƒoƒCƒgˆÊ’uIndex‚©‚çŽn‚Ü‚éƒg[ƒNƒ“} +function WCharGetToken(const Str: String; var Index: Integer; FS: WChar_t): String; +var + WCh: WChar_t; +begin + result := ''; + WCh := GetWChar(Str, Index); + while WCh <> 0 do begin + if WCh = FS then + break + else begin + result := result + WCharToStr(WCh); + WCh := GetWChar(Str, Index); + end; + end; +end; + +{à–¾ F ˆø”Str’†‚̃ƒ^ƒLƒƒƒ‰ƒNƒ^‚É'\'‚ð‚‚¯‚éB +ˆø” F Str: String +•Ô‚è’lF ƒƒ^ƒLƒƒƒ‰ƒNƒ^‚Ì‘O‚É'\'‚ª‚‚¢‚½Str} +function QuoteMetaWChar(Str: String): String; +var + i, j: Integer; + WChar: WChar_t; +begin + result := ''; + i := 1; + WChar := GetWChar(Str, i); + while WChar <> 0 do begin + j := 0; + while j <= High(METACHARS) do begin + if METACHARS[j] = WChar then + break + else + Inc(j); + end; + if j <= High(METACHARS) then + result := result + '\' + WCharToStr(WChar) + else + result := result + WCharToStr(WChar); + WChar := GetWChar(Str, i); + end; + +end; + +{ -============================ TREScanner Class =================================-} +constructor TREScanner.Create(Str: String); +begin + inherited Create; + Self.SetRegExpStr(Str); +end; + +procedure TREScanner.SetRegExpStr(RegExpStr: String); +begin + FRegExpStr := RegExpStr; + FIndex := 1; +end; + +{‹@”\F ƒg[ƒNƒ“‚𓾂é + ‰ðàF GetWChar‚¨‚æ‚ÑUnGetWCharƒƒ\ƒbƒh‚ðŽg‚Á‚ăg[ƒNƒ“‚𓾂éB + ’ˆÓF •Ô‚è’l‚́A—ñ‹“Œ^ REToken_t‚Ì‚¤‚¿retk_CharClassˆÈŠO‚Ì‚Ç‚ê‚©} +function TREScanner.GetTokenStd: REToken_t; +var + WChar: WChar_t; +begin + WChar := GetWChar(FRegExpStr, FIndex); + FSymbol.WChar := WChar; + + { •¶Žš(—ñ)‚ðƒg[ƒNƒ“‚É•ÏŠ·‚·‚é } + if WChar = CONST_NULL then + FToken := retk_End + else if WChar = CONST_DOLLAR then + FToken := retk_LTail + else if WChar = CONST_LPAR then + FToken := retk_LPar + else if WChar = CONST_RPAR then + FToken := retk_RPar + else if WChar = CONST_STAR then + FToken := retk_Star + else if WChar = CONST_PLUS then + FToken := retk_Plus + else if WChar = CONST_DOT then + FToken := retk_Dot + else if WChar = CONST_QMARK then + FToken := retk_QMark + else if WChar = CONST_VL then + FToken := retk_Union + else if WChar = CONST_RBRA then + FToken := retk_RBra + else if WChar = CONST_LBRA then begin + WChar := GetWChar(FRegExpStr, FIndex); + if WChar = CONST_NULL then + raise ERegExpParser.Create('‰Eƒuƒ‰ƒPƒbƒg"]"‚ª•K—v‚Å‚·', FIndex); + if WChar = CONST_CARET then + FToken := retk_LBraNeg {•âƒLƒƒƒ‰ƒNƒ^ƒNƒ‰ƒX} + else begin + UnGetWChar(FRegExpStr, FIndex); + FToken := retk_LBra; + end; + end + else if WChar = CONST_YEN then begin + FToken := retk_Char; + FSymbol.WChar := GetQuotedWChar(FRegExpStr, FIndex); + end + else if WChar = CONST_CARET then begin + FToken := retk_LHead; + end else + FToken := retk_Char; + + result := FToken; +end; + +{‹@”\F '[]'‚ň͂܂ꂽƒLƒƒƒ‰ƒNƒ^ƒNƒ‰ƒX³‹K•\Œ»‚Ì’†‚̃g[ƒNƒ“‚𓾂éB + ‰ðàF GetWChar‚¨‚æ‚ÑUnGetWCharƒƒ\ƒbƒh‚ðŽg‚Á‚ăg[ƒNƒ“‚𓾂éB + ’ˆÓF •Ô‚è’l‚́A—ñ‹“Œ^ REToken_t‚Ì‚¤‚¿ + retk_Char, retk_CharClass, retk_RBra‚Ì‚Ç‚ê‚©B + ƒkƒ‹EƒLƒƒƒ‰ƒNƒ^‚ðŒ©‚Â‚¯‚½‚Æ‚«‚Í—áŠO‚𐶐¬‚·‚éB} +function TREScanner.GetTokenCC: REToken_t; +var + WChar, WChar2, WChar3: WChar_t; +begin + WChar := GetWChar(FRegExpStr, FIndex); + FSymbol.WChar := WChar; + + { •¶Žš(—ñ)‚ðƒg[ƒNƒ“‚É•ÏŠ·‚·‚é } + if WChar = CONST_NULL then + raise ERegExpParser.Create('‰Eƒuƒ‰ƒPƒbƒg"]"‚ª•K—v‚Å‚·', FIndex); + if WChar = CONST_RBRA then + FToken := retk_RBra + else begin + if WChar = CONST_YEN then + {ƒGƒXƒP[ƒvƒV[ƒPƒ“ƒX‚ðˆ—} + WChar := GetQuotedWChar(FRegExpStr, FIndex); + + {ƒLƒƒƒ‰ƒNƒ^”͈͂ð•\‚·'-'‚ÉŠÖ‚·‚鏈—‚ð‚·‚é} + FToken := retk_Char; + WChar2 := GetWChar(FRegExpStr, FIndex); + if WChar2 = CONST_MINUS then begin + {2”Ô–Ú‚Ì•¶Žš‚ª'-'‚¾‚Á‚½‚Æ‚«} + WChar3 := GetWChar(FRegExpStr, FIndex); + if WChar3 = CONST_NULL then + {3”Ô–Ú‚Ì•¶Žš‚ªƒkƒ‹ƒLƒƒƒ‰ƒNƒ^‚Ì‚Æ‚«} + raise ERegExpParser.Create('‰Eƒuƒ‰ƒPƒbƒg"]"‚ª•K—v‚Å‚·', FIndex); + + if WChar3 = CONST_RBRA then begin + {3”Ô–Ú‚Ì•¶Žš‚ª ']'‚Ì‚Æ‚«} + UnGetWChar(FRegExpStr, FIndex); { WChar3‚ð–ß‚· } + UnGetWChar(FRegExpStr, FIndex); { WChar2‚ð–ß‚· } + FSymbol.WChar := WChar; + end else begin + if WChar3 = CONST_YEN then + WChar3 := GetQuotedWChar(FRegExpStr, FIndex); + FToken := retk_CharClass; + if WChar > WChar3 then + raise ERegExpParser.Create('•s³‚ȃLƒƒƒ‰ƒNƒ^”͈͂ł·', FIndex); + FSymbol.CharClass.StartChar := WChar; + FSymbol.CharClass.EndChar := WChar3; + end + end else begin + {2”Ô–Ú‚Ì•¶Žš‚ª'-'‚Å‚Í‚È‚¢‚Æ‚«} + if WChar2 = CONST_NULL then + {2”Ô–Ú‚Ì•¶Žš‚ªƒkƒ‹ƒLƒƒƒ‰ƒNƒ^‚Ì‚Æ‚«} + raise ERegExpParser.Create('‰Eƒuƒ‰ƒPƒbƒg"]"‚ª•K—v‚Å‚·', FIndex); + UnGetWChar(FRegExpStr, FIndex);{WChar2‚ð–ß‚·} + FSymbol.WChar := WChar; + end; + end; + result := FToken; +end; + +function TREScanner.GetToken: REToken_t; +begin + if FInCharClass then begin + result := GetTokenCC; + if result = retk_RBra then + FInCharClass := False; + end else begin + result := GetTokenStd; + if (result = retk_LBra) or (result = retk_LBraNeg) then + FInCharClass := True; + end; +end; + +constructor TREPreProcessor.Create(Str: String); +begin + inherited Create; + FScanner := TREScanner.Create(Str); + FTokenList := TList.Create; + FListOfSynonymDic := TList.Create; + FListOfFuzzyCharDic := TList.Create; +end; + +destructor TREPreProcessor.Destroy; +begin + FScanner.Free; + DestroyTokenListItems; + FTokenList.Free; + FListOfSynonymDic.Free; + FListOfFuzzyCharDic.Free; + inherited Destroy; +end; + +{à–¾ F FTokenList: TList ‚ðAƒAƒCƒeƒ€ƒf[ƒ^ (RETokenInfo_tŒ^ƒŒƒR[ƒh)‚Æ‹¤‚É”pŠü‚·‚éB +’ˆÓ F MakeTokenList‚Ƒ΂Ŏg—p‚·‚éB} +procedure TREPreProcessor.DestroyTokenListItems; +var + i: Integer; +begin + if FTokenList = nil then + exit; + + i := 0; + while i < FTokenList.Count do begin + Dispose(REpTokenInfo_t(FTokenList.Items[i])); + FTokenList.Items[i] := nil; + Inc(i); + end; + FTokenList.Clear; +end; + +{à–¾ F FTokenList: TList‚É RETokenInfo_tŒ^‚̃ŒƒR[ƒh‚ð\’z‚·‚éB +“®ì F ÅŒã”ö‚ÌRETokenInfo_tŒ^ƒŒƒR[ƒh‚́Aí‚ÉToken = retk_End‚Å‚ ‚éB +’ˆÓ F DestroyTokenListƒƒ\ƒbƒh‚Ƒ΂Ŏg—p‚·‚éB} +procedure TREPreProcessor.MakeTokenList; +var + pTokenInfo: REpTokenInfo_t; + prevIndex: Integer; +begin + prevIndex := FScanner.Index; + DestroyTokenListItems; + while FScanner.GetToken <> retk_End do begin + New(pTokenInfo); + try + FTokenList.Add(pTokenInfo); + except + on Exception do begin + Dispose(pTokenInfo); + raise; + end; + end; + with pTokenInfo^ do begin + Token := FScanner.Token; + Symbol := FScanner.Symbol; + FromIndex := prevIndex; + ToIndex := FScanner.Index; + end; + prevIndex := FScanner.Index; + end; + + {ÅŒã”ö retk_End} + New(pTokenInfo); + try + FTokenList.Add(pTokenInfo); + except + on Exception do begin + Dispose(pTokenInfo); + raise; + end; + end; + with pTokenInfo^ do begin + Token := retk_End; + Symbol.WChar := CONST_NULL; + FromIndex := 0; + ToIndex := 0; + end; +end; + +function TREPreProcessor.GetTargetRegExpStr: String; +begin + result := FScanner.RegExpStr; +end; + +procedure TREPreProcessor.SetTargetRegExpStr(Str: String); +begin + FScanner.RegExpStr := Str; +end; + +{à–¾ F ³‹K•\Œ»•¶Žš—ñ‚É“¯ˆÓŒê‚ð‘g‚ݍž‚ށB} +procedure TREPreProcessor.Run; +begin + FProcessedRegExpStr := FScanner.RegExpStr; + if FUseSynonymDic then begin + Self.Process(FindSynonym); + FScanner.RegExpStr := FProcessedRegExpStr; + end; + + if FUseFuzzyCharDic then + Self.Process(FindFuzzyWChar); +end; + +{à–¾ F “¯ˆÓŒê–„‚ߍž‚ݏˆ— Runƒƒ\ƒbƒh‚̉º¿‚¯} +procedure TREPreProcessor.Process(FindFunc: TREPreProcessorFindFunc); +var + j, k: Integer; + TkIndex: Integer; + Info: RETokenInfo_t; + InCC: Boolean; +begin + FProcessedRegExpStr := ''; + MakeTokenList; + InCC := False; + TkIndex := 0; + {‚·‚ׂẴg[ƒNƒ“‚ðŒŸ¸‚·‚é} + while TkIndex < FTokenList.Count do begin + Info := REpTokenInfo_t(FTokenList[TkIndex])^; + {ƒLƒƒƒ‰ƒNƒ^ƒNƒ‰ƒX ('[]'‚Å‚­‚­‚ç‚ꂽ•”•ª)‚É“ü‚é} + if Info.Token = retk_LBra then + InCC := True; + + {ƒLƒƒƒ‰ƒNƒ^ƒNƒ‰ƒX‚©‚ço‚½} + if Info.Token = retk_RBra then + InCC := False; + + {ƒg[ƒNƒ“‚ªƒLƒƒƒ‰ƒNƒ^ˆÈŠO‚©AƒLƒƒƒ‰ƒNƒ^ƒNƒ‰ƒX '[ ]'‚Ì’†‚̏ꍇ} + if (Info.Token <> retk_Char) or InCC then begin + FProcessedRegExpStr := FProcessedRegExpStr + + Copy(FScanner.RegExpStr, Info.FromIndex, Info.ToIndex-Info.FromIndex); + Inc(TkIndex); {‰½‚à‚¹‚¸‚ÉFProcessedRegExpStr‚֒ljÁ} + {ƒg[ƒNƒ“‚ªƒLƒƒƒ‰ƒNƒ^‚̏ꍇ} + end else begin + j := TkIndex; + {j‚ªƒLƒƒƒ‰ƒNƒ^ˆÈŠO‚ðŽw‚·‚܂ŃCƒ“ƒNƒŠƒƒ“ƒg} + while REpTokenInfo_t(FTokenList[j])^.Token = retk_Char do + Inc(j); + + {ƒLƒƒƒ‰ƒNƒ^‚̘A‘±‚ð‚P‚‚ÂŒŸ¸} + while TkIndex < j do begin + k := FindFunc(TkIndex, j); + if k <> -1 then begin + {ƒ}ƒbƒ`‚µ‚½•”•ª‚ð’ljÁ} + FProcessedRegExpStr := FProcessedRegExpStr + FSynonymStr; + TkIndex := k; {ŽŸ‚̃g[ƒNƒ“‚©‚çƒ}ƒbƒ`‚·‚é•”•ª‚ðˆø‚«‘±‚«‚³‚ª‚·B} + end else begin + {ƒ}ƒbƒ`‚µ‚È‚¯‚ê‚΁Aˆê•¶Žš•ª’ljÁ‚µ‚āAƒCƒ“ƒfƒbƒNƒX‚ði‚ß‚é} + Info := REpTokenInfo_t(FTokenList[TkIndex])^; + FProcessedRegExpStr := FProcessedRegExpStr + + Copy(FScanner.RegExpStr, Info.FromIndex, Info.ToIndex-Info.FromIndex);; + Inc(TkIndex); + end; + end; + TkIndex := j; + end; + end; +end; + +{à–¾ F “¯ˆÓŒêŽ«‘ SynonymDic: TList‚ðŽg‚Á‚āA“¯ˆÓŒê‚ð’T‚·B +•Ô‚è’lF ƒg[ƒNƒ“ƒŠƒXƒg“à‚Ì“¯ˆÓŒê‚ÌŽŸ‚̃Cƒ“ƒfƒbƒNƒX + Œ©‚‚©‚ç‚È‚¯‚ê‚Î -1} +function TREPreProcessor.ReferToOneList(FromTokenIndex, ToTokenIndex: Integer; SynonymDic: TList): Integer; +var + StrList: TStrings; + i, j, k, m: Integer; + + {Str‚ÆFTokenList‚ð”äŠr} + function Match(Str: String): Integer; + var + StrIndex, TkIndex: Integer; + WChar: WChar_t; + begin + if Str = '' then begin + result := -1; + exit; + end; + + TkIndex := FromTokenIndex; + StrIndex := 1; + WChar := GetWChar(Str, StrIndex); + while (WChar <> CONST_NULL) and (TkIndex < ToTokenIndex) do begin + if WChar <> REpTokenInfo_t(FTokenList[TkIndex])^.Symbol.WChar then begin + result := -1; + exit; + end else begin + Inc(TkIndex); + WChar := GetWChar(Str, StrIndex); + end; + end; + if WChar = CONST_NULL then + result := TkIndex + else + result := -1; + end; +begin + result := -1; + i := 0; + while i < SynonymDic.Count do begin + StrList := TStrings(SynonymDic[i]); + j := 0; + while j < StrList.Count do begin + k := Match(StrList[j]); + if k <> -1 then begin + {ƒ}ƒbƒ`‚µ‚½} + FSynonymStr := '(' + QuoteMetaWChar(StrList[0]); + m := 1; + while m < StrList.Count do begin + FSynonymStr := FSynonymStr + '|' + QuoteMetaWChar(StrList[m]); + Inc(m); + end; + FSynonymStr := FSynonymStr + ')'; + result := k; + exit; + end; + Inc(j); + end; + Inc(i); + end; +end; + +{à–¾ F +•Ô‚è’lF ƒg[ƒNƒ“ƒŠƒXƒg“à‚Ì“¯ˆÓŒê‚ÌŽŸ‚̃Cƒ“ƒfƒbƒNƒX + Œ©‚‚©‚ç‚È‚¯‚ê‚Î -1 +’ˆÓ F Runƒƒ\ƒbƒh‚ªƒƒ\ƒbƒhƒ|ƒCƒ“ƒ^‚ðProcessƒƒ\ƒbƒh‚É“n‚µA + Processƒƒ\ƒbƒh‚ªŒÄ‚яo‚·B} +function TREPreProcessor.FindSynonym(FromTokenIndex, ToTokenIndex: Integer): Integer; +var + i: Integer; +begin + result := -1; + i := 0; + while i < FListOfSynonymDic.Count do begin + result := ReferToOneList(FromTokenIndex, ToTokenIndex, FListOfSynonymDic[i]); + if result <> -1 then + exit; + Inc(i); + end; +end; + +{à–¾ F +•Ô‚è’lF ƒg[ƒNƒ“ƒŠƒXƒg“à‚Ì“¯ˆÓŒê‚ÌŽŸ‚̃Cƒ“ƒfƒbƒNƒX + Œ©‚‚©‚ç‚È‚¯‚ê‚Î -1 +’ˆÓ F Runƒƒ\ƒbƒh‚ªƒƒ\ƒbƒhƒ|ƒCƒ“ƒ^‚ðProcessƒƒ\ƒbƒh‚É“n‚µA + Processƒƒ\ƒbƒh‚ªŒÄ‚яo‚·B} +function TREPreProcessor.FindFuzzyWChar(FromTokenIndex, ToTokenIndex: Integer): Integer; +var + i: Integer; +begin + result := -1; + i := 0; + while i < FListOfFuzzyCharDic.Count do begin + result := ReferToOneList(FromTokenIndex, ToTokenIndex, FListOfFuzzyCharDic[i]); + if result <> -1 then + exit; + Inc(i); + end; +end; + +constructor TREParseTree.Create; +begin + inherited Create; + FNodeList := TList.Create; + FLeafList := TList.Create; +end; + +destructor TREParseTree.Destroy; +begin + DisposeTree; + FNodeList.Free; + FLeafList.Free; + inherited Destroy; +end; + +{\•¶–؂̃m[ƒh‚ðì¬‚·‚éB + op ‚̓m[ƒh‚ª•\‚·‰‰ŽZAleft‚͍¶‚ÌŽqAright‚͉E‚ÌŽq } +function TREParseTree.MakeInternalNode(TheOp: REOperation_t; pLeft, + pRight: REpNode_t): REpNode_t; +begin + New(result); + with result^ do begin + op := TheOp; + Children.pLeft := pLeft; + Children.pRight := pRight; + end; + try + FNodeList.Add(result); + except + {TList‚сƒ‚ƒŠ•s‘«‚ÌŽž‚Í,V‚µ‚¢\•¶–؂̐߂àŠJ•ú‚µ‚Ä‚µ‚Ü‚¤} + on EOutOfMemory do begin + Dispose(result); + raise; + end; + end; +end; + +{\•¶–Ø‚Ì—t‚ðì‚é + TheC ‚Í‚±‚Ì—t‚ª•\‚·•¶Žš} +function TREParseTree.MakeLeaf(aStartChar, aEndChar: WChar_t): REpNode_t; {char} +var + i: Integer; +begin + {Šù‚É“¯‚¶ƒLƒƒƒ‰ƒNƒ^ƒNƒ‰ƒX‚ðŽ‚Â—t‚ª‘¶Ý‚·‚ê‚΁A‚»‚ê‚ð•Ô‚·B} + for i := 0 to FLeafList.Count-1 do begin + if (REpNode_t(FLeafList[i])^.CharClass.StartChar = aStartChar) and + (REpNode_t(FLeafList[i])^.CharClass.EndChar = aEndChar) then begin + result := FLeafList[i]; + exit; + end; + end; + + New(result); + with result^ do begin + op := reop_char; + CharClass.StartChar := aStartChar; + CharClass.EndChar := aEndChar; + end; + try + FLeafList.Add(result); + except + {TList‚сƒ‚ƒŠ•s‘«‚ÌŽž‚Í,V‚µ‚¢\•¶–؂̐߂àŠJ•ú‚µ‚Ä‚µ‚Ü‚¤} + on EOutOfMemory do begin + Dispose(result); + raise; + end; + end; +end; + +{•¶“ªƒƒ^ƒLƒƒƒ‰ƒNƒ^‚ð•\‚·ßB ¦Žq‚ðŽ‚½‚È‚¢‚ªAMakeInternalNode‚ðŽg‚¤} +function TREParseTree.MakeLHeadNode(WChar: WChar_t): REpNode_t; +begin + result := MakeInternalNode(reop_LHead, nil, nil); + with result^ do begin + CharClass.StartChar := WChar; + CharClass.EndChar := WChar; + end; +end; + +{•¶”öƒƒ^ƒLƒƒƒ‰ƒNƒ^‚ð•\‚·ßB ¦Žq‚ðŽ‚½‚È‚¢‚ªAMakeInternalNode‚ðŽg‚¤} +function TREParseTree.MakeLTailNode(WChar: WChar_t): REpNode_t; +begin + result := MakeInternalNode(reop_LTail, nil, nil); + with result^ do begin + CharClass.StartChar := WChar; + CharClass.EndChar := WChar; + end; +end; + +{”CˆÓ‚̈ꕶŽš‚ð•\‚·'.'ƒƒ^ƒLƒƒƒ‰ƒNƒ^‚ɑΉž‚·‚é•”•ª–Ø‚ðì‚éB + ¦CR LF‚ðœ‚­‘S‚ẴLƒƒƒ‰ƒNƒ^‚ð•\‚·—t‚ðreop_Union‘€ì‚ð•\‚·’†ŠÔß‚ÅŒ‹‚ñ‚¾‚à‚Ì} +function TREParseTree.MakeAnyCharsNode: REpNode_t; +begin + result := MakeInternalNode(reop_Union, MakeLeaf($1, $09), MakeLeaf($0B, $0C)); + result := MakeInternalNode(reop_Union, result, MakeLeaf($0E, $FCFC)); +end; + +{ˆø”‚ª aStartChar <= aEndChar ‚ÌŠÖŒW‚ð–ž‚½‚µ‚Ä‚¢‚é‚Æ‚«‚ɁAMakeLeaf‚ðŒÄ‚Ô + ‚»‚êˆÈŠO‚́Anil ‚ð•Ô‚·B} +function TREParseTree.Check_and_MakeLeaf(aStartChar, aEndChar: WChar_t):REpNode_t; +begin + if aStartChar <= aEndChar then begin + result := MakeLeaf(aStartChar, aEndChar); + end else + result := nil; +end; + +{—t‚ð“à•”ß‚É•Ï‚¦‚éB} +procedure TREParseTree.ChangeLeaftoNode(pLeaf, pLeft, pRight: REpNode_t); +begin + if (pLeft = nil) or (pRight = nil) then + raise Exception.Create('TREParseTree : ’v–½“IƒGƒ‰[');{ debug } + with pLeaf^ do begin + op := reop_Union; + Children.pLeft := pLeft; + Children.pRight := pRight; + end; + FLeafList.Remove(pLeaf); + try + FNodeList.Add(pLeaf); + except + on EOutOfMemory do begin + FreeMem(pLeaf, SizeOf(RENode_t)); + raise; + end; + end; +end; + +{‹@”\F ŒÂX‚Ì—t‚ªŽ‚ƒLƒƒƒ‰ƒNƒ^”͈͂ª‚P‚‚àd•¡‚µ‚È‚¢‚悤‚É‚·‚éB + ‰ðàF —t‚́ACharClassƒtƒB[ƒ‹ƒh‚ðŽ‚¿ACharClassƒtƒB[ƒ‹ƒh‚ÍStartChar‚ÆEndChar + ‚ðƒtƒB[ƒ‹ƒh‚ÉŽ‚ƒŒƒR[ƒh‚Å‚ ‚éB + ŒÂX‚Ì—t‚ªŽ‚ƒLƒƒƒ‰ƒNƒ^‚͈̔͂ªd•¡‚µ‚È‚¢‚©’²‚ׂāAd•¡‚·‚éê‡‚ɂ́A + ‚»‚Ì—t‚𕪊„‚µAreop_Union‚ðŽ‚Â“à•”ß‚Å“™‰¿‚È•”•ª–Ø‚É’¼‚·B} +procedure TREParseTree.ForceCharClassUnique; +var + i, j: Integer; + Changed: Boolean; + + {‹@”\F d•¡‚·‚éƒLƒƒƒ‰ƒNƒ^”͈͂ð‚à‚—t‚Ì•ªŠ„ + ‰ðàF ‚Q‚‚̗tpCCLeaf1‚ÆpCCLeaf2‚̃Lƒƒƒ‰ƒNƒ^”͈͂𒲂ׂāAd•¡‚·‚é‚Æ‚«‚Í + •ªŠ„‚·‚邵‚Ä“™‰¿‚È•”•ª–Ø‚É•ÏŠ·‚·‚éB} + function SplitCharClass(pCCLeaf1, pCCLeaf2: REpNode_t): Boolean; + var + pNode1, pNode2, pNode3: REpNode_t; + S1, S2, SmallE, BigE: WChar_t; + begin + result := False; + {‘Oˆ—F pCCLeaf1 ‚ÌStartChar <= pCCLeaf2 ‚ÌStartChar ‚ð•ÛØ‚·‚é} + if pCCLeaf1^.CharClass.StartChar > pCCLeaf2^.CharClass.StartChar then begin + pNode1 := pCCLeaf1; + pCCLeaf1 := pCCLeaf2; + pCCLeaf2 := pNode1; + end; + + {ƒLƒƒƒ‰ƒNƒ^ƒNƒ‰ƒX‚͈̔͂ªd•¡‚µ‚È‚¢ –”‚Í “¯ˆê‚È‚ç‚Î Exit + ¦ MakeLeafƒƒ\ƒbƒh‚̍\‘¢‚©‚ç‚¢‚Á‚čŏ‰‚͏d•¡‚·‚鎖‚Í‚È‚¢‚ªA•ªŠ„‚ðŒJ‚è•Ô‚· + ‚Əd•¡‚·‚é‰Â”\«‚ª‚ ‚éB} + if (pCCLeaf1^.CharClass.EndChar < pCCLeaf2^.CharClass.StartChar) or + (pCCLeaf1^.CharClass.Chars = pCCLeaf2^.CharClass.Chars) then + exit; + + {(pCCLeaf1 ‚ÌStartChar) S1 <= S2 (pCCLeaf2 ‚ÌStartChar)} + S1 := pCCLeaf1^.CharClass.StartChar; + S2 := pCCLeaf2^.CharClass.StartChar; + + {SmallE ‚́ApCCLeaf1, pCCLeaf2 ‚Ì EndChar ‚̏¬‚³‚¢•û + SmallE <= E2} + if pCCLeaf1^.CharClass.EndChar > pCCLeaf2^.CharClass.EndChar then begin + SmallE := pCCLeaf2^.CharClass.EndChar; + BigE := pCCLeaf1^.CharClass.EndChar; + end else begin + SmallE := pCCLeaf1^.CharClass.EndChar; + BigE := pCCLeaf2^.CharClass.EndChar; + end; + + pNode1 := Check_and_MakeLeaf(S1, S2-1); + pNode2 := Check_and_MakeLeaf(S2, SmallE); + pNode3 := Check_and_MakeLeaf(SmallE+1, BigE); + {if (pNode1 = nil) and (pNode2 = nil) and (pNode3 = nil) then + raise ERegExpParser.Create('’v–½“I‚ȃGƒ‰[', 0); } + if pNode1 = nil then begin {S1 = S2 ‚Ì‚Æ‚«} + if pCCLeaf1^.CharClass.EndChar = BigE then + ChangeLeaftoNode(pCCLeaf1, pNode2, pNode3) + else + ChangeLeaftoNode(pCCLeaf2, pNode2, pNode3); + end else if pNode3 = nil then begin {SmallE = BigE ‚ÌŽž} + ChangeLeaftoNode(pCCLeaf1, pNode1, pNode2); + end else begin + if pCCLeaf1^.CharClass.EndChar = BigE then begin{pCCLeaf1‚ÉpCCLeaf2‚ªŠÜ‚Ü‚ê‚é} + ChangeLeaftoNode(pCCLeaf1, MakeInternalNode(reop_Union, pNode1, pNode2), + pNode3) + end else begin {pCCLeaf1 ‚Æ pCCLeaf2 ‚Ì‚P•”•ª‚ªd‚È‚Á‚Ä‚¢‚é} + ChangeLeaftoNode(pCCLeaf1, pNode1, pNode2); + ChangeLeaftoNode(pCCLeaf2, pNode2, pNode3); + end; + end; + result := True; + end; +begin {procedure TREParser.ForceCharClassUnique} + i := 0; + while i < LeafList.Count do begin + j := i + 1; + Changed := False; + while j < LeafList.Count do begin + Changed := SplitCharClass(LeafList[j], LeafList[i]); + if not Changed then + Inc(j) + else + break; + end; + if not Changed then + Inc(i); + end; +end; {procedure TREParser.ForceCharClassUnique} + +procedure TREParseTree.DisposeTree; +var + i: Integer; +begin + if FNodeList <> nil then begin + for i := 0 to FNodeList.Count - 1 do begin + if FNodeList[i] <> nil then + Dispose(REpNode_t(FNodeList.Items[i])); + end; + FNodeList.Clear; + end; + + if FLeafList <> nil then begin + for i := 0 to FLeafList.Count -1 do begin + if FLeafList[i] <> nil then + Dispose(REpNode_t(FLeafList[i])); + end; + FLeafList.Clear; + end; + FpHeadNode := nil; +end; + +{-=========================== TREParser Class ===============================-} +constructor TREParser.Create(RegExpStr: String); +begin + inherited Create; + FScanner := TREScanner.Create(RegExpStr); + FParseTree := TREParseTree.Create; + {€”õŠ®—¹B Runƒƒ\ƒbƒh‚ðŒÄ‚ׂ΍\•¶‰ðÍ‚ð‚·‚éB} +end; + +destructor TREParser.Destroy; +begin + FScanner.Free; + FParseTree.Free; + inherited Destroy; +end; + +{************************************************************************** + ³‹K•\Œ»‚ðƒp[ƒX‚·‚郁ƒ\ƒbƒhŒQ + **************************************************************************} +procedure TREParser.Run; +begin + FParseTree.DisposeTree; {‚·‚Å‚É‚ ‚é\•¶–Ø‚ð”pŠü‚µ‚ú‰»} + + FScanner.GetToken; {Å‰‚̃g[ƒNƒ“‚ð“ǂݍž‚Þ} + + {³‹K•\Œ»‚ðƒp[ƒX‚·‚é} + FParseTree.pHeadNode := regexp; + + {ŽŸ‚̃g[ƒNƒ“‚ªretk_End ‚Å‚È‚¯‚ê‚΃Gƒ‰[} + if FScanner.Token <> retk_End then begin + raise ERegExpParser.Create('³‹K•\Œ»‚É—]•ª‚È•¶Žš‚ª‚ ‚è‚Ü‚·', + FScanner.Index); + end; + + FParseTree.ForceCharClassUnique;{ƒLƒƒƒ‰ƒNƒ^ƒNƒ‰ƒX‚𕪊„‚µ‚ă†ƒj[ƒN‚É‚·‚é} +end; + +{ ‚ðƒp[ƒX‚µ‚āA“¾‚ç‚ꂽ\•¶–Ø‚ð•Ô‚·B + ‘I‘ð X|Y ‚ð‰ðÍ‚·‚é } +function TREParser.regexp: REpNode_t; +begin + result := term; + while FScanner.Token = retk_Union do begin + FScanner.GetToken; + result := FParseTree.MakeInternalNode(reop_union, result, term); + end; +end; + +{ ‚ðƒp[ƒX‚µ‚āA“¾‚ç‚ꂽ\•¶–Ø‚ð•Ô‚· + ˜AŒ‹‚w‚x‚ð‰ðÍ‚·‚é} +function TREParser.Term: REpNode_t; +begin + if (FScanner.Token = retk_Union) or + (FScanner.Token = retk_RPar) or + (FScanner.Token = retk_End) then + result := FParseTree.MakeInternalNode(reop_Empty, nil, nil) + else begin + result := factor; + while (FScanner.Token <> retk_Union) and + (FScanner.Token <> retk_RPar) and + (FScanner.Token <> retk_End) do begin + result := FParseTree.MakeInternalNode(reop_concat, result, factor); + end; + end; +end; + +{ ‚ðƒp[ƒX‚µ‚āA“¾‚ç‚ꂽ\•¶–Ø‚ð•Ô‚· + ŒJ‚è•Ô‚µX*, X+, X?‚ð‰ðÍ‚·‚é} +function TREParser.Factor: REpNode_t; +begin + result := primary; + if FScanner.Token = retk_Star then begin + result := FParseTree.MakeInternalNode(reop_closure, result, nil); + FScanner.GetToken; + end else if FScanner.Token = retk_Plus then begin + result := FParseTree.MakeInternalNode(reop_concat, result, + FParseTree.MakeInternalNode(reop_closure, result, nil)); + FScanner.GetToken; + end else if FScanner.Token = retk_QMark then begin + result := FParseTree.MakeInternalNode(reop_Union, result, + FParseTree.MakeInternalNode(reop_Empty, nil, nil)); + FScanner.GetToken; + end; +end; + +{ ‚ðƒp[ƒX‚µ‚āA“¾‚ç‚ꂽ\•¶–Ø‚ð•Ô‚·B + •¶Žš‚»‚Ì‚à‚́A(X)‚ð‰ðÍ‚·‚é} +function TREParser.Primary: REpNode_t; +begin + case FScanner.Token of + retk_Char: begin + result := FParseTree.MakeLeaf(FScanner.Symbol.WChar, FScanner.Symbol.WChar); + FScanner.GetToken; + end; + retk_LHead: begin + result := FParseTree.MakeLHeadNode(FScanner.Symbol.WChar); + FScanner.GetToken; + end; + retk_LTail: begin + result := FParseTree.MakeLTailNode(FScanner.Symbol.WChar); + FScanner.GetToken; + end; + retk_Dot: begin + result := FParseTree.MakeAnyCharsNode; + FScanner.GetToken; + end; + retk_LPar: begin + FScanner.GetToken; + result := regexp; + if FScanner.Token <> retk_RPar then + raise ERegExpParser.Create('‰E(•Â‚¶)Š‡ŒÊ‚ª•K—v‚Å‚·', FScanner.Index); + FScanner.GetToken; + end; + retk_LBra, retk_LBraNeg: begin + if FScanner.Token = retk_LBra then + result := CharacterClass(FParseTree) + else + result := NegativeCharacterClass; + if FScanner.Token <> retk_RBra then + raise ERegExpParser.Create('‰Eƒuƒ‰ƒPƒbƒg"]"‚ª•K—v‚Å‚·', FScanner.Index); + FScanner.GetToken; + end; + else + raise ERegExpParser.Create('•’Ê‚Ì•¶ŽšA‚Ü‚½‚͍¶Š‡ŒÊ"("‚ª•K—v‚Å‚·', FScanner.Index); + end; +end; + +{ ‚ðƒp[ƒX‚µ‚āA“¾‚ç‚ꂽ\•¶–Ø‚ð•Ô‚·B + [] ‚ÅŠ‡‚ç‚ꂽ³‹K•\Œ»‚ð‰ðÍ‚·‚é} +function TREParser.CharacterClass(aParseTree: TREParseTree): REpNode_t; + {Token‚ɑΉž‚µ‚½—t‚ðì‚é} + function WCharToLeaf: REpNode_t; + begin + result := nil; + case FScanner.Token of + retk_Char: + result := aParseTree.MakeLeaf(FScanner.Symbol.WChar, FScanner.Symbol.WChar); + + retk_CharClass: + result := aParseTree.MakeLeaf(FScanner.Symbol.CharClass.StartChar, + FScanner.Symbol.CharClass.EndChar); + end; + end; +begin {function TREParser.CharacterClass} + FScanner.GetToken; {GetScannerCC‚́Aretk_RBra, retk_Char, retk_CharClass‚µ‚©•Ô‚³‚È‚¢} + if FScanner.Token = retk_RBra then + raise ERegExpParser.Create('•s³‚ȃLƒƒƒ‰ƒNƒ^ƒNƒ‰ƒXŽw’è‚Å‚·B', FScanner.Index); + + result := WCharToLeaf; + FScanner.GetToken; + while FScanner.Token <> retk_RBra do begin + result := aParseTree.MakeInternalNode(reop_Union, result, WCharToLeaf); + FScanner.GetToken; + end; + +end;{function TREParser.CharacterClass} + + +{ ‚ðƒp[ƒX‚µ‚āA“¾‚ç‚ꂽ\•¶–Ø‚ð•Ô‚·B + [^ ] ‚ÅŠ‡‚ç‚ꂽ³‹K•\Œ»‚ð‰ðÍ‚·‚é} +function TREParser.NegativeCharacterClass: REpNode_t; +var + aParseTree, aNeg_ParseTree: TREParseTree; + i: Integer; + aCharClass: RECharClass_t; + procedure RemoveCC(pLeaf: REpNode_t); + var + i: Integer; + pANode, pNode1, pNode2: REpNode_t; + begin + i := 0; + while i < aNeg_ParseTree.LeafList.Count do begin + pANode := aNeg_ParseTree.LeafList[i]; + if (pLeaf^.CharClass.EndChar < pANode^.CharClass.StartChar) or + (pLeaf^.CharClass.StartChar > pANode^.CharClass.EndChar) then + Inc(i) + else begin + pNode1 := aNeg_ParseTree.Check_and_MakeLeaf(pANode^.CharClass.StartChar, + pLeaf^.CharClass.StartChar-1); + pNode2 := aNeg_ParseTree.Check_and_MakeLeaf(pLeaf^.CharClass.EndChar+1, + pANode^.CharClass.EndChar); + if (pNode1 <> nil) or (pNode2 <> nil) then begin + Dispose(REpNode_t(aNeg_ParseTree.LeafList[i])); + aNeg_ParseTree.LeafList.Delete(i); +//======== 2015/01/07 mod zako ======== + end else begin + Inc(i); +//======== 2015/01/07 mod zako ======== + end; + end; + end; + end; +begin +{ [^abc] = . - [abc] ‚Æ‚¢‚¤“®ì‚ð‚·‚éB} + + aParseTree := TREParseTree.Create; + try + aNeg_ParseTree := TREParseTree.Create; + try + {aParseTree‚É'[]'‚ň͂܂ꂽƒLƒƒƒ‰ƒNƒ^ƒNƒ‰ƒX³‹K•\Œ»‚Ì’†‚ɑΉž‚·‚éß‚ðì‚éB} + aParseTree.pHeadNode := CharacterClass(aParseTree); + {aParseTree‚Ì—t‚ªŽ‚ƒLƒƒƒ‰ƒNƒ^ƒNƒ‰ƒX‚͈̔͂ªd•¡‚µ‚È‚¢‚悤‚ɐ®Œ`} + aParseTree.ForceCharClassUnique; + + {”CˆÓ‚̈ꕶŽš‚ð•\‚·–Ø‚ðaNeg_ParseTree‚ɍ쐬} + aNeg_ParseTree.MakeAnyCharsNode; + + for i := 0 to aParseTree.LeafList.Count-1 do begin + {aNeg_ParseTree‚Ì—t‚©‚çaParseTree‚Ì—t‚Æ“¯‚¶•¨‚ðíœ} + RemoveCC(aParseTree.LeafList[i]); + end; + + {aNeg_ParseTree‚Ì—t‚ðFParseTree‚ɃRƒs[} + result := nil; + if aNeg_ParseTree.LeafList.Count > 0 then begin + aCharClass := REpNode_t(aNeg_ParseTree.LeafList[0])^.CharClass; + result := FParseTree.MakeLeaf(aCharClass.StartChar, aCharClass.EndChar); + for i := 1 to aNeg_ParseTree.LeafList.Count-1 do begin + aCharClass := REpNode_t(aNeg_ParseTree.LeafList[i])^.CharClass; + result := FParseTree.MakeInternalNode(reop_Union, result, + FParseTree.MakeLeaf(aCharClass.StartChar, aCharClass.EndChar)); + end; + end; + finally + aNeg_ParseTree.Free; + end; + finally + aParseTree.Free; + end; +end; + +{$IFDEF DEBUG} +function DebugWCharToStr(WChar: WChar_t): String; +begin + if WChar > $FF then + result := ' ' + Chr(Hi(WChar))+Chr(Lo(WChar))+'($' + IntToHex(WChar, 4) + ')' + else + result := ' ' + Chr(Lo(WChar))+' ($00' + IntToHex(WChar, 2) + ')'; + +end; + +{ ƒfƒoƒbƒO—pƒƒbƒ\ƒbƒhB\•¶–Ø‚ðVCL ‚ÌTOutLineƒRƒ“ƒ|[ƒlƒ“ƒg‚ɏ‘‚«ž‚Þ} +{ \•¶–Ø‚ª‘å‚«‚·‚¬‚é‚ƁATOutLineƒRƒ“ƒ|[ƒlƒ“ƒg‚ªhŽ€‚ʁh‚Ì‚Å’ˆÓ} +procedure TREParser.WriteParseTreeToOutLine(anOutLine: TOutLine); + procedure SetOutLineRecursive(pTree: REpNode_t; ParentIndex: Integer); + var + aStr: String; + NextParentIndex: Integer; + begin + if pTree = nil then + exit; + + case pTree^.op of + reop_Char: begin{ •¶Žš‚»‚Ì‚à‚Ì } + if pTree^.CharClass.StartChar <> pTree^.CharClass.EndChar then + aStr := DebugWCharToStr(pTree^.CharClass.StartChar) + + ' ` '+ DebugWCharToStr(pTree^.CharClass.EndChar) + else + aStr := DebugWCharToStr(pTree^.CharClass.StartChar); + end; + reop_LHead: + aStr := '•¶“ª '+DebugWCharToStr(pTree^.CharClass.StartChar); + reop_LTail: + aStr := '•¶”ö '+DebugWCharToStr(pTree^.CharClass.StartChar); + reop_Concat:{ XY } + aStr := '˜AŒ‹ '; + reop_Union:{ X|Y} + aStr := '‘I‘ð "|"'; + reop_Closure:{ X* } + aStr := '•Â•ï "*"'; + reop_Empty:{ ‹ó } + aStr := '‹ó'; + end; + + NextParentIndex := anOutLine.AddChild(ParentIndex, aStr); + + if pTree^.op in [reop_Concat, reop_Union, reop_Closure] then begin + SetOutLineRecursive(pTree^.Children.pLeft, NextParentIndex); + SetOutLineRecursive(pTree^.Children.pRight, NextParentIndex); + end; + end; +begin + anOutLine.Clear; + SetOutLineRecursive(FParseTree.pHeadNode, 0); +end; + +{$ENDIF} + +{ -============================== TRE_NFA Class ==================================-} +constructor TRE_NFA.Create(Parser: TREParser; LHeadWChar, LTailWChar: WChar_t); +begin + inherited Create; + FStateList := TList.Create; + FParser := Parser; + FLHeadWChar := LHeadWChar; + FLTailWChar := LTailWChar; +end; + +destructor TRE_NFA.Destroy; +begin + DisposeStateList; + inherited Destroy; +end; + +{ NFAó‘Ô•\‚ð”jŠü‚·‚é} +procedure TRE_NFA.DisposeStateList; +var + i: Integer; + pNFANode, pNext: RE_pNFANode_t; +begin + if FStateList <> nil then begin + for i := 0 to FStateList.Count-1 do begin + pNFANode := FStateList.Items[i]; + while pNFANode <> nil do begin + pNext := pNFANode^.Next; + Dispose(pNFANode); + pNFANode := pNext; + end; + end; + FStateList.Free; + FStateList := nil; + end; +end; + +{ \•¶–Ø Tree‚ɑΉž‚·‚éNFA‚𐶐¬‚·‚é} +procedure TRE_NFA.Run; +begin + { NFA ‚̏‰Šúó‘Ԃ̃m[ƒh‚ðŠ„‚è“–‚Ä‚éB} + FEntryState := NumberNode; + + { NFA ‚̏I—¹ó‘Ԃ̃m[ƒh‚ðŠ„‚è“–‚Ä‚é } + FExitState := NumberNode; + + { NFA ‚𐶐¬‚·‚é } + GenerateStateList(FParser.ParseTree.pHeadNode, FEntryState, FExitState); +end; + +{ ƒm[ƒh‚ɔԍ†‚ðŠ„‚è“–‚Ä‚é} +function TRE_NFA.NumberNode: Integer; +begin + with FStateList do begin + result := Add(nil); + end; +end; + +{ NFAó‘Ôß ‚ð‚P‚쐬} +function TRE_NFA.MakeNFANode: RE_pNFANode_t; +begin + New(result); +end; + +{ FStateList‚ɏó‘Ô‘JˆÚ‚ð’ljÁ‚·‚éB + ó‘Ô TransFrom ‚ɑ΂µ‚Ä aCharClass“à‚Ì•¶Žš‚ŏó‘Ô TransTo ‚Ö‚Ì‘JˆÚ‚ð’ljÁ‚·‚éB} +procedure TRE_NFA.AddTransition(TransFrom, TransTo: Integer; + aCharClass: RECharClass_t); {Char} +var + pNFANode: RE_pNFANode_t; +begin + pNFANode := MakeNFANode; + + with pNFANode^ do begin + CharClass := aCharClass; + TransitTo := TransTo; + Next := RE_pNFANode_t(FStateList.Items[TransFrom]); + end; + FStateList.Items[TransFrom] := pNFANode; +end; + +{ \•¶–Ø pTree ‚ɑ΂·‚é StateList‚𐶐¬‚·‚é + NFA‚Ì“ü‚èŒû‚ðentry, oŒû‚ðway_out‚Æ‚·‚é } +procedure TRE_NFA.GenerateStateList(pTree: REpNode_t; entry, way_out: Integer); +var + aState1, aState2: Integer; + aCharClass: RECharClass_t; +begin + case pTree^.op of + reop_Char: + AddTransition(entry, way_out, pTree^.CharClass); + reop_LHead: begin {'^'} + {•¶“ªƒƒ^ƒLƒƒƒ‰ƒNƒ^'^' ‚Í TransFrom = FEntryState‚Ì‚Æ‚«ˆÈŠO‚́A + ’ʏí‚̃Lƒƒƒ‰ƒNƒ^‚Æ‚µ‚Ĉµ‚¤B} + if Entry <> FEntryState then begin + AddTransition(entry, way_out, pTree^.CharClass); + end else begin + FRegExpHasLHead := True; + with aCharClass do begin + StartChar := FLHeadWChar; + EndChar := FLHeadWChar; + end; + AddTransition(entry, way_out, aCharClass); + end; + end; + reop_LTail: begin + {s––ƒƒ^ƒLƒƒƒ‰ƒNƒ^ '$'‚́ATransTo = FExitState‚Ì‚Æ‚«ˆÈŠO‚́A + ’ʏí‚̃Lƒƒƒ‰ƒNƒ^‚Æ‚µ‚Ĉµ‚¤B} + if way_out <> FExitState then begin + AddTransition(entry, way_out, pTree^.CharClass); + end else begin + FRegExpHasLTail := True; + with aCharClass do begin + StartChar := FLTailWChar; + EndChar := FLTailWChar; + end; + AddTransition(entry, way_out, aCharClass); + end; + end; + reop_Union: begin {'|'} + GenerateStateList(pTree^.Children.pLeft, entry, way_out); + GenerateStateList(pTree^.Children.pRight, entry, way_out); + end; + reop_Closure: begin {'*'} + aState1 := NumberNode; + aState2 := NumberNode; + { ó‘Ô entry ¨ ƒÃ‘JˆÚ ¨ ó‘Ô aState1} + AddTransition(entry, aState1, CONST_EMPTYCharClass); + { ó‘Ô aState1 ¨ (pTree^.Children.pLeft)ˆÈ‰º‚Ì‘JˆÚ ¨ ó‘Ô aState2} + GenerateStateList(pTree^.Children.pLeft, aState1, aState2); + { ó‘Ô aState2 ¨ ƒÃ‘JˆÚ ¨ ó‘Ô aState1} + AddTransition(aState2, aState1, CONST_EMPTYCharClass); + { ó‘Ô aState1 ¨ ƒÃ‘JˆÚ ¨ ó‘Ô way_out} + AddTransition(aState1, way_out, CONST_EMPTYCharClass); + end; + reop_Concat: begin {'AB'} + aState1 := NumberNode; + { ó‘Ô entry ¨ (pTree^.Children.pLeft)‘JˆÚ ¨ ó‘Ô aState1} + GenerateStateList(pTree^.Children.pLeft, entry, aState1); + { ó‘Ô aState1 ¨ (pTree^.Children.pRight)‘JˆÚ ¨ ó‘Ô way_out} + GenerateStateList(pTree^.Children.pRight, aState1, way_out); + end; + reop_Empty: + AddTransition(entry, way_out, CONST_EMPTYCharClass); + else begin + raise Exception.Create('This cannot happen in TRE_NFA.GenerateStateList'); + end; + end; +end; + +{$IFDEF DEBUG} +{TStringsƒIƒuƒWƒFƒNƒg‚ɁANFA ‚Ì“à—e‚ð‘‚«ž‚Þ} +procedure TRE_NFA.WriteNFAtoStrings(Strings: TStrings); +var + i: Integer; + pNFANode: RE_pNFANode_t; + Str: String; +begin + Strings.clear; + Strings.BeginUpDate; + for i := 0 to FStateList.Count-1 do begin + pNFANode := FStateList.items[i]; + if i = EntryState then + Str := Format('ŠJŽn %2d : ', [i]) + else if i = ExitState then + Str := Format('I—¹ %2d : ', [i]) + else + Str := Format('ó‘Ô %2d : ', [i]); + while pNFANode <> nil do begin + if pNFANode^.CharClass.StartChar = CONST_EMPTY then + Str := Str + Format('ƒÃ‘JˆÚ‚Å ó‘Ô %2d ‚Ö :',[pNFANode^.TransitTo]) + else if pNFANode^.CharClass.StartChar <> pNFANode^.CharClass.EndChar then + Str := Str + Format('•¶Žš%s ‚©‚ç%s ‚Å ó‘Ô %2d ‚Ö :', + [DebugWCharToStr(pNFANode^.CharClass.StartChar), + DebugWCharToStr(pNFANode^.CharClass.EndChar), pNFANode^.TransitTo]) + else if pNFANode^.CharClass.StartChar = FLHeadWChar then begin + Str := Str + Format('•¶“ªƒR[ƒh%s ‚Å ó‘Ô %2d ‚Ö :', + [DebugWCharToStr(pNFANode^.CharClass.StartChar), pNFANode^.TransitTo]); + end else if pNFANode^.CharClass.StartChar = FLTailWChar then begin + Str := Str + Format('•¶”öƒR[ƒh%s ‚Å ó‘Ô %2d ‚Ö :', + [DebugWCharToStr(pNFANode^.CharClass.StartChar), pNFANode^.TransitTo]); + end else + Str := Str + Format('•¶Žš%s ‚Å ó‘Ô %2d ‚Ö :', + [DebugWCharToStr(pNFANode^.CharClass.StartChar), pNFANode^.TransitTo]); + + pNFANode := pNFANode^.Next; + end; + Strings.Add(Str); + end; + Strings.EndUpDate; +end; +{$ENDIF} + +{ -========================== TRE_NFAStateSet Class =============================-} +constructor TRE_NFAStateSet.Create(StateMax: Integer); +var + i: Integer; +begin + inherited Create; + FCapacity := StateMax div 8 + 1; + GetMem(FpArray, FCapacity); + for i := 0 to FCapacity-1 do + FpArray^[i] := 0; +end; + +destructor TRE_NFAStateSet.Destroy; +begin + FreeMem(FpArray, FCapacity); + inherited Destroy; +end; + +function TRE_NFAStateSet.Has(StateIndex: Integer): Boolean; +begin + result := (FpArray^[StateIndex div 8] and (1 shl (StateIndex mod 8))) <> 0; +end; + +procedure TRE_NFAStateSet.Include(StateIndex: Integer); +begin + FpArray^[StateIndex div 8] := FpArray^[StateIndex div 8] or + (1 shl (StateIndex mod 8)); +end; + +function TRE_NFAStateSet.Equals(AStateSet: TRE_NFAStateSet): Boolean; +var + i: Integer; +begin + result := False; + for i := 0 to FCapacity - 1 do begin + if FpArray^[i] <> AStateSet.pArray^[i] then + exit; + end; + result := True; +end; + +{ -============================= TRE_DFA Class ==================================-} +constructor TRE_DFA.Create(NFA: TRE_NFA); +begin + inherited Create; + FNFA := NFA; + FStateList := TList.Create; +end; + +destructor TRE_DFA.Destroy; +begin + DestroyStateList; + + inherited Destroy; +end; + +{DFAó‘Ԃ̃ŠƒXƒg‚ð”jŠü} +procedure TRE_DFA.DestroyStateList; +var + i: Integer; + pDFA_State: RE_pDFAState_t; + pDFA_StateSub, pNextSub: RE_pDFAStateSub_t; +begin + if FStateList <> nil then begin + for i := 0 to FStateList.Count-1 do begin + pDFA_State := FStateList.Items[i]; + if pDFA_State <> nil then begin + pDFA_StateSub := pDFA_State^.next; + while pDFA_StateSub <> nil do begin + pNextSub := pDFA_StateSub^.next; + Dispose(pDFA_StateSub); + pDFA_StateSub := pNextSub; + end; + pDFA_State^.StateSet.Free; + Dispose(pDFA_State); + end; + end; + FStateList.Free; + FStateList := nil; + end; +end; + +procedure TRE_DFA.Run; +begin + FRegExpHasLHead := FNFA.RegExpHasLHead; + FRegExpHasLTail := FNFA.RegExpHasLTail; + Convert_NFA_to_DFA; {NFAó‘Ô•\‚©‚çDFAó‘Ô•\‚ðì‚é} + StateListSort; {DFAó‘Ô•\‚̐߂ð“ü—̓L[‡‚ɐ®—ñ‚·‚éB¦ŒŸõ‚̍‚‘¬‰»‚Ì‚½‚ß} + CheckIfRegExpIsSimple;{³‹K•\Œ»‚ª’Pƒ‚È•¶Žš—ñ‚©ƒ`ƒFƒbƒN} +end; + +{ NFA‚𓙉¿‚È‚c‚e‚`‚Ö‚Æ•ÏŠ·‚·‚é} +procedure TRE_DFA.Convert_NFA_to_DFA; +var + Initial_StateSet: TRE_NFAStateSet; + t: RE_pDFAState_t; + pDFA_TransNode, pTransNodeHead: RE_pDFATransNode_t; + pDFA_StateSub: RE_pDFAStateSub_t; +begin +{DFA‚̏‰Šúó‘Ô‚ð“o˜^‚·‚é} + Initial_StateSet := TRE_NFAStateSet.Create(FNFA.StateList.Count); + Initial_StateSet.Include(FNFA.EntryState); + {‚m‚e‚`‰Šúó‘Ԃ̏W‡‚ð‹‚ß‚éiƒÃ‘JˆÚ‚àŠÜ‚ށj} + Collect_Empty_Transition(Initial_StateSet); + FpInitialState := Register_DFA_State(Initial_StateSet); + + {–¢ˆ—‚Ì‚c‚e‚`ó‘Ô‚ª‚ ‚ê‚΁A‚»‚ê‚ðŽæ‚èo‚µ‚ďˆ—‚·‚é + ’–Ú‚µ‚Ä‚¢‚é‚c‚e‚`ó‘Ô‚ð‚”‚Æ‚·‚é} + t := Fetch_Unvisited_D_state; + while t <> nil do begin + + {ˆ—Ï‚݂̈ó‚ð•t‚¯‚é} + t^.visited := True; + + {ó‘Ô‚”‚©‚ç‘JˆÚ‰Â”\‚ÈDFAó‘Ô‚ð‚·‚×‚ÄDFA‚É“o˜^‚·‚éB} + pTransNodeHead := Compute_Reachable_N_state(t); + try + pDFA_TransNode := pTransNodeHead; + while pDFA_TransNode <> nil do begin + { NFAó‘ԏW‡‚̃Ã-closure‚ð‹‚ß‚é} + Collect_Empty_Transition(pDFA_TransNode^.ToNFAStateSet); + + { ‘JˆÚî•ñ‚ðDFAó‘ԂɉÁ‚¦‚é} + New(pDFA_StateSub); + with pDFA_StateSub^ do begin + next := nil; + CharClass := pDFA_TransNode^.CharClass; + next := t^.next; + end; + t^.next := pDFA_StateSub; + + {Œ»Ý‚ÌDFAó‘Ô‚©‚ç‚Ì‘JˆÚæ‚̐V‚µ‚¢DFAó‘Ô‚ð“o˜^} + pDFA_StateSub^.TransitTo := + Register_DFA_State(pDFA_TransNode^.ToNFAStateSet); + {Register_DFA_Stateƒƒ\ƒbƒh‚É‚æ‚èToNFAStateSetƒIƒuƒWƒFƒNƒg‚ÍDFA_State‚ɏŠ—L‚³‚ê‚é} + {pDFA_TransNode^.ToNFAStateSet := nil;} + + pDFA_TransNode := pDFA_TransNode^.next; + end; + t := Fetch_Unvisited_D_state; + finally + Destroy_DFA_TransList(pTransNodeHead); + end; + end; +end; + +{ NFAó‘ԏW‡ StateSet ‚ɑ΂µ‚Ä ƒÃ-closure‘€ì‚ðŽÀs‚·‚éB + ƒÃ‘JˆÚ‚Å‘JˆÚ‰Â”\‚È‘S‚Ä‚Ì‚m‚e‚`ó‘Ô‚ð’ljÁ‚·‚é} +procedure TRE_DFA.Collect_Empty_Transition(StateSet: TRE_NFAStateSet); +var + i: Integer; + { NFAó‘ԏW‡ StateSet‚É‚m‚e‚`ó‘Ô ‚“‚ð’ljÁ‚·‚éB + “¯Žž‚É‚m‚e‚`ó‘Ô‚“‚©‚çƒÃ‘JˆÚ‚ňړ®‚Å‚«‚é‚m‚e‚`ó‘Ô‚à’ljÁ‚·‚é} + procedure Mark_Empty_Transition(StateSet: TRE_NFAStateSet; s: Integer); + var + pNFANode: RE_pNFANode_t; + begin + StateSet.Include(s); + pNFANode := FNFA.StateList[s]; + while pNFANode <> nil do begin + if (pNFANode^.CharClass.StartChar = CONST_EMPTY) and + (not StateSet.Has(pNFANode^.TransitTo)) then + Mark_Empty_Transition(StateSet, pNFANode^.TransitTo); + pNFANode := pNFANode^.next; + end; + end; +begin + for i := 0 to FNFA.StateList.Count-1 do begin + if StateSet.Has(i) then + Mark_Empty_Transition(StateSet, i); + end; +end; + +{ NFAó‘ԏW‡ aStateSet ‚ð‚c‚e‚`‚É“o˜^‚µ‚āA‚c‚e‚`ó‘Ԃւ̃|ƒCƒ“ƒ^‚ð•Ô‚·B + aStateSet‚ªI—¹ó‘Ô‚ðŠÜ‚ñ‚Å‚¢‚ê‚΁Aacceptedƒtƒ‰ƒO‚ðƒZƒbƒg‚·‚éB + ‚·‚Å‚ÉaStateSet‚ª‚c‚e‚`‚É“o˜^‚³‚ê‚Ä‚¢‚½‚牽‚à‚µ‚È‚¢} +function TRE_DFA.Register_DFA_State(var aStateSet: TRE_NFAStateSet): RE_pDFAState_t; +var + i: Integer; +begin + { NFAó‘Ô aStateSet ‚ª‚·‚Å‚É‚c‚e‚`‚É“o˜^‚³‚ê‚Ä‚¢‚½‚çA‰½‚à‚µ‚È‚¢‚ŃŠƒ^[ƒ“‚·‚é} + for i := 0 to FStateList.Count-1 do begin + if RE_pDFAState_t(FStateList[i])^.StateSet.Equals(aStateSet) then begin + result := RE_pDFAState_t(FStateList[i]); + exit; + end; + end; + + {DFA‚É•K—v‚ȏî•ñ‚ðƒZƒbƒg‚·‚é} + New(result); + with result^ do begin + StateSet := aStateSet; + visited := False; + if aStateSet.Has(FNFA.ExitState) then + accepted := True + else + accepted := False; + next := nil; + end; + aStateSet := nil; + FStateList.add(result); +end; + +{ ˆ—Ï‚݂̈󂪂‚¢‚Ä‚¢‚È‚¢‚c‚e‚`ó‘Ô‚ð’T‚·B + Œ©‚‚©‚ç‚È‚¯‚ê‚Înil‚ð•Ô‚·B} +function TRE_DFA.Fetch_Unvisited_D_state: RE_pDFAState_t; +var + i: Integer; +begin + + for i := 0 to FStateList.Count-1 do begin + if not RE_pDFAState_t(FStateList[i])^.visited then begin + result := FStateList[i]; + exit; + end; + end; + result := nil; +end; + +{Compute_Reachable_N_state ‚ªì‚é RE_DFATransNode_tŒ^‚̃Šƒ“ƒNƒŠƒXƒg‚ð”jŠü‚·‚é} +procedure TRE_DFA.Destroy_DFA_TransList(pDFA_TransNode: RE_pDFATransNode_t); +var + pNext: RE_pDFATransNode_t; +begin + if pDFA_TransNode <> nil then begin + while pDFA_TransNode <> nil do begin + pNext := pDFA_TransNode^.next; + if pDFA_TransNode^.ToNFAStateSet <> nil then + pDFA_TransNode^.ToNFAStateSet.Free; + Dispose(pDFA_TransNode); + + pDFA_TransNode := pNext; + end; + end; +end; + +{ DFAó‘ÔpDFAState‚©‚ç‘JˆÚ‰Â”\‚ÈNFAó‘Ô‚ð’T‚µ‚āAƒŠƒ“ƒNƒŠƒXƒg‚É‚µ‚Ä•Ô‚·} +function TRE_DFA.Compute_Reachable_N_state(pDFAState: RE_pDFAState_t): RE_pDFATransNode_t; +var + i: Integer; + pNFANode: RE_pNFANode_t; + a, b: RE_pDFATransNode_t; +label + added; +begin + result := nil; +try + {‚·‚ׂĂ̂m‚e‚`ó‘Ô‚ð‡‚É’²‚ׂé} + for i := 0 to FNFA.StateList.Count-1 do begin + + { NFAó‘Ôi‚ªDFAó‘Ô pDFAState‚ÉŠÜ‚Ü‚ê‚Ä‚¢‚ê‚΁AˆÈ‰º‚̏ˆ—‚ðs‚¤} + if pDFAState^.StateSet.Has(i) then begin + + { NFAó‘Ô i ‚©‚ç‘JˆÚ‰Â”\‚È‚m‚e‚`ó‘Ô‚ð‚·‚×‚Ä’²‚ׂăŠƒXƒg‚É‚·‚é} + pNFANode := RE_pNFANode_t(FNFA.StateList[i]); + while pNFANode <> nil do begin + if pNFANode^.CharClass.StartChar <> CONST_EMPTY then begin {ƒÃ‘JˆÚ‚Í–³Ž‹} + a := result; + while a <> nil do begin + if a^.CharClass.Chars = pNFANode^.CharClass.Chars then begin + a^.ToNFAStateSet.Include(pNFANode^.TransitTo); + goto added; + end; + a := a^.next; + end; + {ƒLƒƒƒ‰ƒNƒ^ pNFANode^.CharClass.c‚É‚æ‚é‘JˆÚ‚ª“o˜^‚³‚ê‚Ä‚¢‚È‚¯‚ê‚ΒljÁ} + New(b); + with b^ do begin + CharClass := pNFANode^.CharClass; + ToNFAStateSet := TRE_NFAStateSet.Create(FNFA.StateList.Count); + ToNFAStateSet.Include(pNFANode^.TransitTo); + next := result; + end; + result := b; + added: + ; + end; + pNFANode := pNFANode^.next; + end; + end; + end; +except + on EOutOfMemory do begin + Destroy_DFA_TransList(result); {\’z’†‚̃ŠƒXƒg”pŠü} + raise; + end; +end; +end; + +{ó‘ÔƒŠƒXƒg‚̃Šƒ“ƒNƒŠƒXƒg‚𐮗ñ‚·‚é(ƒ}[ƒWEƒ\[ƒg‚ðŽg—p)} +procedure TRE_DFA.StateListSort; +var + i: Integer; + {ƒ}[ƒWEƒ\[ƒgˆ—‚ðÄ‹A“I‚ɍs‚¤} + function DoSort(pCell: RE_pDFAStateSub_t): RE_pDFAStateSub_t; + var + pMidCell, pACell: RE_pDFAStateSub_t; + + {2‚‚̃ŠƒXƒg‚ðƒ\[ƒg‚µ‚È‚ª‚畹‡‚·‚é} + function MergeList(pCell1, pCell2: RE_pDFAStateSub_t): RE_pDFAStateSub_t; + var + Dummy: RE_DFAStateSub_t; + begin + Result := @Dummy; + {‚Ç‚¿‚ç‚©‚̃ŠƒXƒg‚ªA‹ó‚É‚È‚é‚Ü‚Å”½•œ} + while (pCell1 <> nil) and (pCell2 <> nil) do begin + {pCell1 ‚Æ pCell2 ‚ð”äŠr‚µ‚fl³‚¢•û‚ðResult‚ɒljÁ‚µ‚Ä‚¢‚­} + if pCell1^.CharClass.StartChar > pCell2^.CharClass.StartChar then begin + {pCell2‚Ì•û‚ª¬‚³‚¢} + Result^.Next := pCell2; + Result := pCell2; + pCell2 := pCell2^.Next; + end else begin + {pCell1‚Ì•û‚ª¬‚³‚¢} + Result^.Next := pCell1; + Result := pCell1; + pCell1 := pCell1^.Next; + end; + end; + {—]‚Á‚½ƒŠƒXƒg‚ð‚»‚Ì‚Ü‚Üresult ‚ɒljÁ} + if pCell1 = nil then + Result^.Next := pCell2 + else + Result^.Next := pCell1; + + result := Dummy.Next; + end; + + {DoSort–{‘Ì} + begin + if (pCell = nil) or (pCell^.Next = nil) then begin + result := pCell; + exit; {—v‘f‚ª‚P‚A‚Ü‚½‚́A–³‚¢‚Æ‚«‚́A‚·‚®‚É exit} + end; + + {ACell ‚ª‚R”Ԗڂ̃Zƒ‹‚ðŽw‚·‚悤‚É‚·‚éB–³‚¯‚ê‚΁Anil ‚ðŽ‚½‚¹‚é} + {ƒŠƒXƒg‚ª‚Q`‚RŒÂ‚̃Zƒ‹‚ðŽ‚Â‚Æ‚«‚É‚àA•ªŠ„‚ðs‚¤‚悤‚É‚·‚éB} + pACell := pCell^.Next^.Next; + pMidCell := pCell; + {MidCell ‚ªAƒŠƒXƒg‚̐^‚ñ’†‚ ‚½‚è‚̃Zƒ‹‚ðŽw‚·‚悤‚É‚·‚éB} + while pACell <> nil do begin + pMidCell := pMidCell^.Next; + pACell := pACell^.Next; + if pACell <> nil then + pACell := pACell^.Next; + end; + + {MidCell ‚ÌŒã‚ë‚ŃŠƒXƒg‚ð‚Q•ªŠ„‚·‚é} + pACell := pMidCell^.Next; + pMidCell^.Next := nil; + + result := MergeList(DoSort(pCell), DoSort(pACell)); + end; +begin {Sort –{‘Ì} + for i := 0 to FStateList.Count-1 do begin + RE_pDFAState_t(FStateList[i])^.next := + DoSort(RE_pDFAState_t(FStateList[i])^.next); + end; +end; + +{‹@”\F Œ»Ý‚̐³‹K•\Œ»‚ªA•’Ê‚Ì•¶Žš—ñ‚©H + •’Ê‚Ì•¶Žš—ñ‚¾‚Á‚½‚çAFRegExpIsSimple = True; FSimpleRegExpStr‚É•¶Žš—ñ‚ɐݒè + ‚»‚êˆÈŠO‚̏ꍇ‚́A FRegExpIsSimple = False;FSimpleRegExpStr = ''} +procedure TRE_DFA.CheckIfRegExpIsSimple; +var + pDFAState: RE_pDFAState_t; + pSub: RE_pDFAStateSub_t; + WChar: WChar_t; +begin + FRegExpIsSimple := False; + FSimpleRegExpStr := ''; + + pDFAState := FpInitialState; + + while pDFAState <> nil do begin + pSub := pDFAState^.next; + if pSub = nil then + break; + if (pSub^.next <> nil) or + {•¡”‚̃Lƒƒƒ‰ƒNƒ^‚ðŽó‚¯“ü‚ê‚é} + (pSub^.CharClass.StartChar <> pSub^.CharClass.EndChar) or + {ƒLƒƒƒ‰ƒNƒ^”ÍˆÍ‚ðŽ‚Â} + (pDFAState^.Accepted and (pSub^.TransitTo <> nil)) + {Žó—Œã‚àƒLƒƒƒ‰ƒNƒ^‚ðŽó‚¯“ü‚ê‚é}then begin + + FSimpleRegExpStr := ''; + exit; + end else begin + WChar := pSub^.CharClass.StartChar; + FSimpleRegExpStr := FSimpleRegExpStr + WCharToStr(WChar); + end; + pDFAState := pSub^.TransitTo; + end; + FRegExpIsSimple := True; +end; + + +{$IFDEF DEBUG} +{TStringsƒIƒuƒWƒFƒNƒg‚ɁADFA ‚Ì“à—e‚ð‘‚«ž‚Þ} +procedure TRE_DFA.WriteDFAtoStrings(Strings: TStrings); +var + i: Integer; + pDFA_State: RE_pDFAState_t; + pDFA_StateSub: RE_pDFAStateSub_t; + Str: String; +begin + Strings.clear; + Strings.BeginUpDate; + for i := 0 to FStateList.Count-1 do begin + pDFA_State := FStateList.items[i]; + if pDFA_State = FpInitialState then + Str := Format('ŠJŽn %2d : ', [i]) + else if pDFA_State^.Accepted then + Str := Format('I—¹ %2d : ', [i]) + else + Str := Format('ó‘Ô %2d : ', [i]); + pDFA_StateSub := pDFA_State^.next; + while pDFA_StateSub <> nil do begin + if pDFA_StateSub^.CharClass.StartChar <> pDFA_StateSub^.CharClass.EndChar then + Str := Str + Format('•¶Žš %s ‚©‚ç •¶Žš%s ‚Å ó‘Ô %2d ‚Ö :', + [DebugWCharToStr(pDFA_StateSub^.CharClass.StartChar), + DebugWCharToStr(pDFA_StateSub^.CharClass.EndChar), + FStateList.IndexOf(pDFA_StateSub^.TransitTo)]) + + else if pDFA_StateSub^.CharClass.StartChar = FNFA.LHeadWChar then begin + Str := Str + Format('•¶“ªƒR[ƒh %s ‚Å ó‘Ô %2d ‚Ö :', + [DebugWCharToStr(pDFA_StateSub^.CharClass.StartChar), + FStateList.IndexOf(pDFA_StateSub^.TransitTo)]); + end else if pDFA_StateSub^.CharClass.StartChar = FNFA.LTailWChar then begin + Str := Str + Format('•¶”öƒR[ƒh %s ‚Å ó‘Ô %2d ‚Ö :', + [DebugWCharToStr(pDFA_StateSub^.CharClass.StartChar), + FStateList.IndexOf(pDFA_StateSub^.TransitTo)]); + end else + Str := Str + Format('•¶Žš %s ‚Å ó‘Ô %2d ‚Ö :', + [DebugWCharToStr(pDFA_StateSub^.CharClass.StartChar), + FStateList.IndexOf(pDFA_StateSub^.TransitTo)]); + + pDFA_StateSub := pDFA_StateSub^.Next; + end; + Strings.Add(Str); + end; + Strings.EndUpDate; +end; +{$ENDIF} + +{ -=================== TRegularExpression Class ==============================-} +constructor TRegularExpression.Create(AOwner: TComponent); +begin + inherited Create(AOwner); + FRegExpList := TStringList.Create; + FRegExpListMax := 30; {RegExpList‚̍€–ڐ”Ý’è 30} + {FCurrentIndex = 0 ‚̓kƒ‹‚̐³‹K•\Œ»‚ŏí‚ÉŽg‚¦‚é‚悤‚É‚·‚éB} + FCurrentIndex := FRegExpList.Add(''); + FPreProcessor := TREPreProcessor.Create(''); + Translate(FRegExpList[FCurrentIndex]); +end; + +destructor TRegularExpression.Destroy; +begin + FPreProcessor.Free; + DisposeRegExpList; + inherited Destroy; +end; + +{³‹K•\Œ»ƒŠƒXƒg(FRegExpList: TStringList)‚ÆObjectsƒvƒƒpƒeƒB‚ÉŒ‹‚Ñ•t‚¯‚ç‚ꂽ + TRE_DFAƒIƒuƒWƒFƒNƒg‚ð”jŠü} +procedure TRegularExpression.DisposeRegExpList; +var + i: Integer; +begin + if FRegExpList <> nil then begin + with FRegExpList do begin + for i := 0 to Count-1 do begin + TRE_DFA(Objects[i]).Free; + end; + end; + FRegExpList.Free; + FRegExpList := nil; + end; +end; + +{ ---------------------- ƒvƒƒpƒeƒB ƒAƒNƒZƒX ƒƒ\ƒbƒh -----------------} +{RegExpƒvƒƒpƒeƒB‚Ìwriteƒƒ\ƒbƒh} +procedure TRegularExpression.SetRegExp(Str: String); +var + OrigRegExp: String; + function FindRegExpInList(RegExpStr: String): Integer; + var + i: Integer; + begin + result := -1; + i := 0; + while i < FRegExpList.Count do begin + if RegExpStr = FRegExpList[i] then begin + result := i; + exit; + end; + Inc(i); + end; + end; +begin + OrigRegExp := Str;{ƒvƒŠƒvƒƒZƒbƒT‚ð’Ê‚é‘O‚̐³‹K•\Œ»‚ð‘Þ”ð} + with FPreProcessor do begin + TargetRegExpStr := Str; + Run; + Str := ProcessedRegExpStr; + end; + + try + FCurrentIndex := FindRegExpInList(Str); + {FRegExpList“à‚ɃLƒƒƒbƒVƒ…‚³‚ê‚Ä‚¢‚È‚¢‚Æ‚«‚́AƒRƒ“ƒpƒCƒ‹} + if FCurrentIndex = -1 then begin + if FRegExpList.Count = FRegExpListMax then begin + TRE_DFA(FRegExpList.Objects[FRegExpList.Count-1]).Free; + FRegExpList.Delete(FRegExpList.Count-1); + end; + FRegExpList.Insert(1, Str); + FCurrentIndex := 1; + Translate(FRegExpList[1]); + end; + FRegExp := OrigRegExp; + except + {—áŠO‚ª”­¶‚µ‚½‚Æ‚«‚́Aí‚Ƀkƒ‹³‹K•\Œ»‚ðÝ’è‚·‚éB} + on Exception do begin + FCurrentIndex := 0; + FRegExp := ''; + raise; + end; + end; +end; + +{RegExpƒvƒƒpƒeƒB‚Ìreadƒƒ\ƒbƒh} +function TRegularExpression.GetProcessedRegExp: String; +begin + result := FRegExpList[FCurrentIndex]; +end; + +{ListOfFuzzyCharDicƒvƒƒpƒeƒB readƒƒ\ƒbƒh} +function TRegularExpression.GetListOfFuzzyCharDic: TList; +begin + result := FPreProcessor.ListOfFuzzyCharDic; +end; + +{GetListOfSynonymDicƒvƒƒpƒeƒB readƒƒ\ƒbƒh} +function TRegularExpression.GetListOfSynonymDic: TList; +begin + result := FPreProcessor.ListOfSynonymDic; +end; + +{RegExpIsSimpleƒvƒƒpƒeƒB readƒƒ\ƒbƒh} +function TRegularExpression.GetRegExpIsSimple: Boolean; +begin + result := GetCurrentDFA.RegExpIsSimple; +end; + +{SimpleRegExpƒvƒƒpƒeƒB readƒƒ\ƒbƒh} +function TRegularExpression.GetSimpleRegExp: String; +begin + result := GetCurrentDFA.SimpleRegExpStr; +end; + +{HasLHeadƒvƒƒpƒeƒB readƒƒ\ƒbƒh} +function TRegularExpression.GetHasLHead: Boolean; +begin + result := GetCurrentDFA.RegExpHasLHead; +end; + +{HasLTailƒvƒƒpƒeƒB writeƒƒ\ƒbƒh} +function TRegularExpression.GetHasLTail: Boolean; +begin + result := GetCurrentDFA.RegExpHasLTail; +end; + +{Œ»Ý‚̐³‹K•\Œ»‚ɑΉž‚·‚éTRE_DFAŒ^ƒIƒuƒWƒFƒNƒg‚𓾂é} +function TRegularExpression.GetCurrentDFA: TRE_DFA; +begin + result := TRE_DFA(FRegExpList.Objects[FCurrentIndex]); +end; + +{DFAó‘Ô•\‚̏‰Šúó‘Ô‚ð•\‚·ƒm[ƒh‚ւ̃|ƒCƒ“ƒ^‚𓾂邱‚Æ‚ª‚Å‚«‚éB} +function TRegularExpression.GetpInitialDFAState: RE_pDFAState_t; +begin + result := TRE_DFA(FRegExpList.Objects[FCurrentIndex]).pInitialState; +end; + +function TRegularExpression.GetUseFuzzyCharDic: Boolean; +begin + result := FPreProcessor.UseFuzzyCharDic; +end; + +procedure TRegularExpression.SetUseFuzzyCharDic(Val: Boolean); +begin + FPreProcessor.UseFuzzyCharDic := Val; + Self.RegExp := FRegExp; {V‚µ‚¢Ý’è‚ōăRƒ“ƒpƒCƒ‹} +end; + +function TRegularExpression.GetUseSynonymDic: Boolean; +begin + result := FPreProcessor.UseSynonymDic; +end; + +procedure TRegularExpression.SetUseSynonymDic(Val: Boolean); +begin + FPreProcessor.UseSynonymDic := Val; + Self.RegExp := FRegExp; {V‚µ‚¢Ý’è‚ōăRƒ“ƒpƒCƒ‹} +end; + +function TRegularExpression.GetLineHeadWChar: WChar_t; +begin + result := CONST_LINEHEAD; +end; + +function TRegularExpression.GetLineTailWChar: WChar_t; +begin + result := CONST_LINETAIL; +end; + +{***** ³‹K•\Œ»•¶Žš—ñ¨\•¶–؍\‘¢¨NFA¨DFA ‚Ì•ÏŠ·‚ðs‚¤ *****} +procedure TRegularExpression.Translate(RegExpStr: String); +var + DFA: TRE_DFA; + Parser: TREParser; + NFA: TRE_NFA; +begin + DFA := nil; + try + Parser := TREParser.Create(RegExpStr); + try + Parser.Run; + NFA := TRE_NFA.Create(Parser, GetLineHeadWChar, GetLineTailWChar); + try + Self.FLineHeadWChar := NFA.LHeadWChar; + Self.FLineTailWChar := NFA.LTailWChar; + NFA.Run; + DFA := TRE_DFA.Create(NFA); + FRegExpList.Objects[FCurrentIndex] := DFA; + TRE_DFA(FRegExpList.Objects[FCurrentIndex]).Run; + finally + NFA.Free; + end; + finally + Parser.Free; + end; + except + On Exception do begin + DFA.Free; + FRegExpList.Delete(FCurrentIndex); + FCurrentIndex := 0; + raise; + end; + end; +end; + +{ó‘Ô DFAstate‚©‚當Žš‚ƒ‚É‚æ‚Á‚Ä‘JˆÚ‚µ‚āA‘JˆÚŒã‚̏ó‘Ô‚ð•Ô‚·B + •¶Žš‚ƒ‚É‚æ‚Á‚Ä‘JˆÚo—ˆ‚È‚¯‚ê‚Înil‚ð•Ô‚·} +function TRegularExpression.NextDFAState(DFAState: RE_pDFAState_t; c: WChar_t): RE_pDFAState_t; +var + pSub: RE_pDFAStateSub_t; +begin + {‚P‚‚ÌDFAState‚ªŽ‚ pSub‚̃Šƒ“ƒN‚ł̓Lƒƒƒ‰ƒNƒ^ƒNƒ‰ƒX‚ª¸‡‚É‚È‚ç‚ñ‚Å‚¢‚邱‚Æ + ‚ð‘O’ñ‚Æ‚µ‚Ä‚¢‚éB} + result := nil; + pSub := DFAState^.next; + while pSub <> nil do begin + if c < pSub^.CharClass.StartChar then + exit + else if c <= pSub^.CharClass.EndChar then begin + result := pSub^.TransitTo; + exit; + end; + pSub := pSub^.next; + end; +end; + +constructor TMatchCORE.Create(AOwner: TComponent); +begin + inherited Create(AOwner); + FLineSeparator := mcls_CRLF; +end; + +procedure TMatchCORE.MatchStd(pText: PChar; var pStart, pEnd: PChar); +var + pDFAState: RE_pDFAState_t; + pp: PChar; +begin + pStart := nil; + pEnd := nil; + + {pText‚ªƒkƒ‹•¶Žš—ñ‚ŁA³‹K•\Œ»‚ªƒkƒ‹•¶Žš—ñ‚Ɉê’v‚·‚é‚Æ‚«} + if (Byte(pText^) = CONST_NULL) and GetCurrentDFA.pInitialState.Accepted then begin + pStart := pText; + pEnd := pText; + exit; + end; + + {’–Ú“_‚ð‚P•¶Žš‚‚¸‚炵‚È‚ª‚çƒ}ƒbƒ`‚·‚éÅ¶•”•ª‚ðŒ©‚Â‚¯‚é} + while Byte(pText^) <> CONST_NULL do begin + {DFA‚̏‰Šúó‘Ô‚ðÝ’è} + pDFAState := Self.GetCurrentDFA.pInitialState; + pp := pText; + {DFAó‘Ô•\‚É•¶Žš‚ð“ü—Í‚µ‚Ä‚¢‚Á‚ă}ƒbƒ`‚·‚éÅ’·•”•ª‚ðŒ©‚Â‚¯‚é} + repeat + if pDFAState^.accepted then begin + {I—¹ó‘Ô‚Å‚ ‚ê‚Ώꏊ‚ð‹L˜^‚µ‚Ä‚¨‚­B + Œ‹‰Ê‚Æ‚µ‚ă}ƒbƒ`‚µ‚½Å¶Å’·•”•ª‚ª‹L˜^‚³‚ê‚é} + pStart := pText; + pEnd := pp; + end; + {ŽŸ‚̏ó‘Ô‚É‘JˆÚ} + pDFAState := NextDFAState(pDFAState, PCharGetWChar(pp)); + until pDFAState = nil; + + {ƒ}ƒbƒ`‚µ‚½‚Æ‚«‚ÍExit} + if pStart <> nil then + exit; + + {’–ڈʒu‚ð‚P•¶Žš•ªi‚ß‚éB} + if IsDBCSLeadByte(Byte(pText^)) then + Inc(pText, 2) + else + Inc(pText); + end; + {ƒ}ƒbƒ`‚µ‚È‚¢B} +end; + +procedure TMatchCORE.MatchEX(pText: PChar; var pStart, pEnd: PChar); +begin + pStart := pText; + pEnd := MatchHead(pText, GetCurrentDFA.pInitialState); + if pEnd = nil then + MatchEX_Inside(pText, pStart, pEnd); +end; + +procedure TMatchCORE.MatchEX_Inside(pText: PChar; var pStart, pEnd: PChar); +var + DFA: TRE_DFA; + pInitialDFAState: RE_pDFAState_t; +begin + pStart := nil; + pEnd := nil; + + DFA := GetCurrentDFA; + pInitialDFAState := DFA.pInitialState; + while Byte(pText^) <> CONST_NULL do begin + pEnd := MatchInSide(pText, pInitialDFAState); + if pEnd <> nil then begin + pStart := pText; + exit; + end else if (Byte(pText^) = CONST_LF) and + DFA.RegExpHasLHead then begin + pEnd := MatchHead(pText+1, pInitialDFAState); + if pEnd <> nil then begin + pStart := pText+1; + exit; + end; + end; + {’–ڈʒu‚ð‚P•¶Žš•ªi‚ß‚éB} + if IsDBCSLeadByte(Byte(pText^)) then + Inc(pText, 2) + else + Inc(pText); + end; + + if DFA.RegExpHasLTail and (NextDFAState(pInitialDFAState, LineTailWChar) <> nil) then begin + {³‹K•\Œ»‚ª•¶”öƒƒ^ƒLƒƒƒ‰ƒNƒ^‚Ì‚Ý‚Ì‚Æ‚«(RegExp = '$')‚Ì“ÁŽêˆ—} + pStart := pText; + pEnd := pText; + end; + end; + +function TMatchCORE.MatchHead(pText: PChar; pDFAState: RE_pDFAState_t): PChar; +var + pEnd: PChar; +begin +{³‹K•\Œ»‚ªs“ªƒƒ^ƒLƒƒƒ‰ƒNƒ^‚ðŠÜ‚ñ‚Å‚¢‚é} + if GetCurrentDFA.RegExpHasLHead then begin + result := MatchInSide(pText, NextDFAState(pDFAState, LineHeadWChar)); + if result <> nil then begin + {ƒ}ƒbƒ`‚µ‚½B‚±‚ÌŽž“_‚ŁAresult <> nil Šm’è} + pEnd := result; + {‚³‚ç‚ɁARegExp = '(^Love|Love me tender)'‚ŁAText = 'Love me tender. Love me sweet' + ‚̏ꍇ‚ɍō¶Å’·‚Ń}ƒbƒ`‚·‚é‚̂́A'Love me tender'‚Å‚È‚¯‚ê‚΂Ȃç‚È‚¢‚̂ŁA‚»‚Ìˆ×‚Ì + ƒ}ƒbƒ`ŒŸ¸‚ðs‚¤B} + result := MatchInside(pText, pDFAState); + if (result = nil) or (pEnd > result) then + result := pEnd; + end; + end else begin +{³‹K•\Œ»‚ªs“ªƒƒ^ƒLƒƒƒ‰ƒNƒ^‚ðŠÜ‚ñ‚Å‚¢‚È‚¢} + result := MatchInside(pText, pDFAState); + end; +end; + +function TMatchCORE.MatchInside(pText: PChar; pDFAState: RE_pDFAState_t): PChar; +var + pEnd: PChar; + WChar: WChar_t; + pPrevDFAState: RE_pDFAState_t; +begin + result := nil; + pEnd := pText; + + if pDFAState = nil then + exit; + repeat + if pDFAState^.accepted then begin + {I—¹ó‘Ô‚Å‚ ‚ê‚Ώꏊ‚ð‹L˜^‚µ‚Ä‚¨‚­B + Œ‹‰Ê‚Æ‚µ‚ă}ƒbƒ`‚µ‚½Å¶Å’·•”•ª‚ª‹L˜^‚³‚ê‚é} + result := pEnd; + end; + pPrevDFAState := pDFAState; + {DFA‚ðó‘Ô‘JˆÚ‚³‚¹‚é} + WChar := PCharGetWChar(pEnd); + pDFAState := NextDFAState(pDFAState, WChar); + until pDFAState = nil; + + if (IsLineEnd(WChar) or (WChar = CONST_NULL)) and + (NextDFAState(pPrevDFAState, LineTailWChar) <> nil) then begin + {s––ƒƒ^ƒLƒƒƒ‰ƒNƒ^‚ð“ü—Í‚µ‚āAnilˆÈŠO‚ª‹A‚Á‚Ä‚­‚é‚Æ‚«‚Í•K‚¸Aƒ}ƒbƒ`‚·‚é} + result := pEnd; + if WChar <> CONST_NULL then + Dec(result); {CR($0d)‚Ì•ª Decrement} + end; +end; + +function TMatchCORE.IsLineEnd(WChar: WChar_t): Boolean; +begin + result := False; + case FLineSeparator of + mcls_CRLF: result := (WChar = CONST_CR); + mcls_LF: result := (WChar = CONST_LF); + end; +end; + +{ -========================== TAWKStr Class ==================================- } +constructor TAWKStr.Create(AOwner: TComponent); +begin + inherited Create(AOwner); + + ListOfFuzzyCharDic.Add(RE_FuzzyCharDic); {ƒLƒƒƒ‰ƒNƒ^“¯ˆêŽ‹Ž«‘‚ð’ljÁ} +end; + +procedure TAWKStr.SetRegExp(Str: String); +begin + inherited SetRegExp(Str); + if not (HasLHead or HasLTail) then begin + FMatchProc := MatchStd; + end else begin + FMatchProc := MatchEx; + end; +end; + +{•¶Žš—ñ’†‚Ì'\' ‚Å ˆø—p‚³‚ꂽƒLƒƒƒ‰ƒNƒ^‚ðˆ—‚·‚éB \n, \t \\ ...} +function TAWKStr.ProcessEscSeq(Text: String): String; +var + WChar: WChar_t; + Index: Integer; +begin + result := ''; + Index := 1; + while Index <= Length(Text) do begin + WChar := GetWChar(Text, Index); + if WChar = Ord('\') then + result := result + WCharToStr(GetQuotedWChar(Text, Index)) + else + result := result + WCharToStr(WChar); + end; +end; + +{Sub, GSubƒƒ\ƒbƒh‚ÅŽg—pB '&'‚ðƒ}ƒbƒ`‚µ‚½•¶Žš—ñ‚É’uŠ·‚¦‚é} +function TAWKStr.Substitute_MatchStr_For_ANDChar(Text: String; MatchStr: String): String; +var + i: Integer; + aStr: String; + WCh, WCh2: WChar_t; +begin + i := 1; + aStr := ''; +{'\&'‚ð'\\&'‚É‚µ‚Ä‚©‚ç} + while i <= Length(Text) do begin + WCh := GetWChar(Text, i); + if WCh = CONST_YEN then begin + aStr := aStr + WCharToStr(WCh); + + WCh := GetWChar(Text, i); + if WCh = CONST_ANP then begin + aStr := Concat(aStr, WCharToStr(CONST_YEN)); + end; + end; + aStr := aStr + WCharToStr(WCh); + end; + +{ƒGƒXƒP[ƒvEƒV[ƒPƒ“ƒX‚ðˆ—} + Text := ProcessEscSeq(aStr); + +{'&' ‚ð MatchStr‚Å’uŠ·‚¦A'\&'‚ð'&'‚É’uŠ·‚¦} + result := ''; + i := 1; + while i <= Length(Text) do begin + WCh := GetWChar(Text, i); + if WCh = CONST_ANP then + result := Concat(result, MatchStr) + else if WCh = CONST_YEN then begin + WCh2 := GetWChar(Text, i); + if WCh2 = CONST_ANP then begin + result := result + WCharToStr(WCh2); + end else begin + if WCh2 <> CONST_NULL then + UnGetWChar(Text, i); + result := result + WCharToStr(WCh); + end; + end else begin + result := result + WCharToStr(WCh); + end; + end; +end; + +function TAWKStr.Match(Text: String; var RStart, RLength: Integer): Integer; +var + pStart, pEnd: PChar; +begin + FMatchProc(PChar(Text), pStart, pEnd); + if pStart = nil then begin + RStart := 0; + RLength := 0; + result := 0; + end else begin + RStart := pStart - PChar(Text)+1; {RStart‚Í‚Pƒx[ƒX} + RLength := pEnd - pStart; + result := RStart; + end; +end; + +{AWK Like function Sub_Raw} +function TAWKStr.Sub(SubText: String; var Text: String): Boolean; +var + pStart, pEnd: PChar; + MatchStr: String; +begin + FMatchProc(PChar(Text), pStart, pEnd); + if pStart <> nil then begin +{ƒ}ƒbƒ`‚µ‚½} + MatchStr := Copy(Text, pStart-PChar(Text)+1, pEnd-pStart); {ƒ}ƒbƒ`‚µ‚½•”•ª} + Delete(Text, pStart-PChar(Text)+1, pEnd-pStart); + {SubText‚Ì‚È‚©‚Ì&ƒLƒƒƒ‰ƒNƒ^‚ðƒ}ƒbƒ`‚µ‚½•”•ª(MatchStr)‚Å’uŠ·‚¦‚éB} + SubText := Substitute_MatchStr_For_ANDChar(SubText, MatchStr); + Insert(SubText, Text, pStart-PChar(Text)+1); + result := True; + end else begin +{ƒ}ƒbƒ`‚µ‚È‚¢} + result := False; + end; +end; + +{AWK Like GSubRaw } +function TAWKStr.GSub(SubText: String; var Text: String): Integer; +var + pStart, pEnd: PChar; + ResultText, aText: String; + MatchStr: String; + WChar: WChar_t; +begin + ResultText := ''; {Œ‹‰Ê‚Ì•¶Žš—ñ‚ð“ü‚ê‚é•Ï”} + aText := Text; {ŒŸõ‘ΏۂƂµ‚ÄŽg‚¤} + result := 0; + FMatchProc(PChar(aText), pStart, pEnd); + if pStart = nil then + exit {‰½‚àƒ}ƒbƒ`‚µ‚È‚¢} + else if aText = '' then begin + result := 1; {ƒ}ƒbƒ`‚µ‚½‚ª Text=''} + Text := Substitute_MatchStr_For_ANDChar(SubText, ''); + exit; + end; + + {ƒ}ƒbƒ`‚µ‚Ä Text <> ''} + while True do begin + ResultText := Concat(ResultText, Copy(aText, 1, pStart-PChar(aText)));{‘O”¼•”•ª} + MatchStr := Copy(aText, pStart-PChar(aText)+1, pEnd-pStart); {ƒ}ƒbƒ`‚µ‚½•”•ª•¶Žš—ñ} + MatchStr := Substitute_MatchStr_For_ANDChar(SubText, MatchStr); + ResultText := Concat(ResultText, MatchStr);{+ ’uŠ·•¶Žš—ñ} + + if pStart = pEnd then begin {‹ó•¶Žš—ñ‚Ƀ}ƒbƒ`‚µ‚½‚Æ‚«‚Ì“ÁŽêˆ—} + if isDBCSLeadByte(Byte(pStart^)) or + ((LineSeparator = mcls_CRLF) and (Byte(pStart^) = CONST_CR)) then begin + ResultText := Concat(ResultText, Copy(aText, pStart-PChar(aText)+1, 2)); + Inc(pEnd, 2); + end else begin + ResultText := Concat(ResultText, Copy(aText, pStart-PChar(aText)+1, 1)); + if Byte(pEnd^) <> CONST_NULL then + Inc(pEnd, 1); + end; + end; + Inc(result); + + WChar := Byte((pEnd-1)^); + {Chr($0a)‚𒲂ׂéˆ×‚¾‚¯‚Ȃ̂ŁA‚QƒoƒCƒg•¶Žš‚̍l—¶•s—vB aText = ''‚Í‚ ‚肦‚È‚¢} + aText := String(pEnd); + {ƒ}ƒbƒ`‚µ‚½•”•ª•¶Žš—ñ‚ÌŒã‚Ì•”•ª‚ðaText‚ɐݒè} + if aText = '' then + break; + if WChar = CONST_LF then begin + FMatchProc(PChar(aText), pStart, pEnd); + if pStart = nil then + break; + end else begin + MatchEX_Inside(PChar(aText), pStart, pEnd); + if pStart = nil then + break; + end; + end; + Text := Concat(ResultText, aText); +end; + +function TAWKStr.Split(Text: String; StrList: TStrings): Integer; +var + pStart, pEnd: PChar; + Str: String; +begin + StrList.Clear;{Œ‹‰Ê•¶Žš—ñƒŠƒXƒg‚Ì“à—eƒNƒŠƒA} + Str := ''; + while Text <> '' do begin + FMatchProc(PChar(Text), pStart, pEnd); + if pStart = nil then begin + {ƒ}ƒbƒ`‚µ‚È‚©‚Á‚½‚Æ‚«} + StrList.Add(Concat(Str, Text)); + Str := ''; + break; + end else if (pStart = PChar(Text)) and (pStart = pEnd) then begin + {æ“ª‚̃kƒ‹•¶Žš—ñ‚Ƀ}ƒbƒ`‚µ‚½‚Æ‚«‚Ì“ÁŽêˆ—} + if IsDBCSLeadByte(Byte(Text[1])) then begin + Str := Concat(Str, Copy(Text, 1, 2)); + Text := Copy(Text, 3, Length(Text)); + end else begin + Str := Concat(Str, Text[1]); + Text := Copy(Text, 2, Length(Text)); + end; + end else begin; + {ƒ}ƒbƒ`‚µ‚½} + StrList.Add(Concat(Str, Copy(Text, 1, pStart-PChar(Text)))); + Str := ''; + Text := String(pEnd); + if Text = '' then begin + {ÅŒã”ö‚Ƀ}ƒbƒ`‚µ‚½‚Æ‚«‚Ì“ÁŽêˆ—} + StrList.Add(''); + break; + end; + end; + end; + if Str <> '' then + StrList.Add(Str); + result := StrList.Count; +end; + +{ -=========================== TTxtFile Class ================================-} +constructor TTxtFile.Create(aFileName: String; var CancelRequest: Boolean); +begin + inherited Create; + FpCancelRequest := @CancelRequest; {CancelRequest‚ªTrue‚Å“r’†I—¹‚·‚é} + + FBuffSize := 1024*100; {ƒoƒbƒtƒ@‚̃TƒCƒY} + FTailMargin := 100; + + FFileName := aFileName; + System.FileMode := 0; {ƒtƒ@ƒCƒ‹ƒAƒNƒZƒXƒ‚[ƒh ‚ð“ǂݏo‚µê—p‚ɐݒè} + AssignFile(FF, FFileName); + try + Reset(FF, 1); + except + on E: EInOutError do begin + raise EFileNotFound.Create(E.Message); + end; + end; + FFileOpened := True; { ƒtƒ@ƒCƒ‹ƒI[ƒvƒ“‚̃tƒ‰ƒOBDestroy‚ÅŽg—p‚·‚é} + FpBuff := AllocMem(FBuffSize+FTailMargin); + FpBuff^ := Chr($0a); { ƒtƒ@ƒCƒ‹æ“ªs‚̍s“ª‚É‚k‚e Chr($0a)‚ð•t‰Á} + BuffRead(FpBuff+1); + Inc(FReadCount); { æ“ª‚Ì‚k‚e($0a)‚Ì‚Ô‚ñ‚ð‰ÁŽZ} + FpBase := FpBuff; + FpLineBegin := FpBuff; + FpForward := FpBuff; + FLineNo := 0; +end; + +destructor TTxtFile.Destroy; +begin + if FFileOpened then + CloseFile(FF); + + if FpBuff <> nil then begin + FreeMem(FpBuff, FBuffSize+FTailMargin); + end; + + inherited Destroy; +end; + +procedure TTxtFile.BuffRead(pBuff: PChar); +begin + BlockRead(FF, pBuff^, FBuffSize, FReadCount); + if FReadCount = 0 then begin + {FpLineBegin := FpBase;} + raise EEndOfFile.Create('End Of File'); + end; + + {“ǂݍž‚ñ‚¾ƒf[ƒ^‚̍Ōã‚Ƀkƒ‹EƒLƒƒƒ‰ƒNƒ^‚ð‘‚«ž‚Þ} + if not Eof(FF) then begin + (pBuff+FReadCount)^ := Chr(0); + end else begin + if (pBuff+FReadCount-1)^ <> Chr($0a) then begin + (pBuff+FReadCount)^ := Chr($0a); + (pBuff+FREadCount+1)^ := Chr(0); + (pBuff+FReadCount+2)^ := Chr(0); + Inc(FReadCount); + end else begin + (pBuff+FReadCount)^ := Chr(0); + (pBuff+FreadCount+1)^ := Char(0); + end; + end; + + Application.ProcessMessages; + if FpCancelRequest^ then + raise EGrepCancel.Create('CancelRequest'); +end; + +procedure TTxtFile.IncPBaseNullChar(Ch: Char); +var + Distance: Integer; +begin + if FpBase = (PChar(FBrokenLine)+Length(FBrokenLine)) then begin + {FBrokenLine(StringŒ^) ‚Ì’†‚ÅChr(0)‚É’B‚µ‚½‚Æ‚«B} + FpBase := FpBuff; + end else begin + {FpBuff(PChar) ƒoƒbƒtƒ@‚Ì’†‚ÅChr(0)‚É’B‚µ‚½‚Æ‚«B} + if FpBase < FpBuff+FReadCount then begin + {ƒtƒ@ƒCƒ‹’†‚Ì•s³‚ȃkƒ‹ƒLƒƒƒ‰ƒNƒ^ Chr(0)‚́ASpace($20)‚ɕ␳} + FpBase^ := Chr($20); + end else begin + {ƒoƒbƒtƒ@‚̏I‚í‚è‚É—ˆ‚½} + if Eof(FF) then begin + {ƒtƒ@ƒCƒ‹‚̏I‚í‚è‚É—ˆ‚½} + if Ch = Chr(0) then + Dec(FpBase); + raise EEndOfFile.Create('End Of File'); + end else begin + {ƒtƒ@ƒCƒ‹‚ð‚Ü‚¾“Ç‚ß‚é} + if (FpLineBegin >= PChar(FBrokenLine)) and + (FpLineBegin < (PChar(FBrokenLine)+Length(FBrokenLine))) then begin + {FpLineBegin‚ªFBrokenLine‚Ì’†‚ðŽw‚µ‚Ä‚¢‚éB} + Distance := FpLineBegin-PChar(FBrokenLine); + FBrokenLine := Concat(FBrokenLine, String(FpBuff)); + FpLineBegin := PChar(FBrokenLine)+Distance; + BuffRead(FpBuff); + FpBase := FpBuff; + end else begin + {FpLineBegin‚ªƒoƒbƒtƒ@’†‚ðŽw‚µ‚Ä‚¢‚é‚Ì‚Å‚»‚±‚©‚çFBrokenLine‚ðŽæ‚é} + FBrokenLine := String(FpLineBegin); + BuffRead(FpBuff); + FpBase := FpBuff; + FpLineBegin := PChar(FBrokenLine); + end; + end; + end; + end; +end; + +{‹@”\F FpBase‚ðƒCƒ“ƒNƒŠƒƒ“ƒg‚µ‚āAŽŸ‚Ì‚PƒoƒCƒg‚ðŽw‚·‚悤‚É‚·‚éB} +function TTxtFile.IncPBase: Char; +var + ApBase: PChar; +begin + result := FpBase^; + Inc(FpBase); + if FpBase^ = Chr(0) then + {ƒkƒ‹EƒLƒƒƒ‰ƒNƒ^‚̏ˆ—} + IncPBaseNullChar(result); + if result = Chr($0a) then begin + {‰üsˆ—} + if (FpBase < PChar(FBrokenLine)) or (FpBase > (PChar(FBrokenLine) + + Length(FBrokenLine))) then begin + {FpBase‚ªƒoƒbƒtƒ@‚ðŽw‚µ‚Ä‚¢‚é‚Æ‚«} + FBrokenLine := ''; + FpLineBegin := FpBase; + Inc(FLineNo); + end else begin + {FpBase‚ªFBrokenLine’†‚ðŽw‚µ‚Ä‚¢‚é‚Æ‚«} + FpLineBegin := FpBase; + Inc(FLineNo); + end; + end; + if FpBase^ = Chr($0d) then begin + ApBase := FpBase; + Inc(FpBase); + if FpBase^ = Chr(0) then + {ƒkƒ‹EƒLƒƒƒ‰ƒNƒ^‚̏ˆ—} + IncPBaseNullChar(result); + if FpBase^ <> Chr($0a) then begin + { CR($0d)‚ÌŽŸ‚ªLF($0a)‚Å‚È‚¢‚Æ‚«‚́A$0d‚ð$0a‚É’uŠ·‚·‚éB} + if FpBase = FpBuff then + FpBase := PChar(FBrokenLine)+Length(FBrokenLine)-1 + else + FpBase := ApBase; + FpBase^ := Chr($0a); + end + end; + FpForward := FpBase; +end; + +function TTxtFile.AdvanceBase: WChar_t; +var + ApBase: PChar; + Ch: Char; +begin + {«‚‘¬‰»‚Ì‚½‚ßIncPBase–„‚ߍž‚Ý} + Ch := FpBase^; + Inc(FpBase); + if FpBase^ = Chr(0) then + {ƒkƒ‹EƒLƒƒƒ‰ƒNƒ^‚̏ˆ—} + IncPBaseNullChar(Ch); + if Ch = Chr($0a) then begin + {‰üsˆ—} + if (FpBase < PChar(FBrokenLine)) or (FpBase > (PChar(FBrokenLine) + + Length(FBrokenLine))) then begin + {FpBase‚ªƒoƒbƒtƒ@‚ðŽw‚µ‚Ä‚¢‚é‚Æ‚«} + FBrokenLine := ''; + FpLineBegin := FpBase; + Inc(FLineNo); + end else begin + {FpBase‚ªFBrokenLine’†‚ðŽw‚µ‚Ä‚¢‚é‚Æ‚«} + FpLineBegin := FpBase; + Inc(FLineNo); + end; + end; + if FpBase^ = Chr($0d) then begin + ApBase := FpBase; + Inc(FpBase); + if FpBase^ = Chr(0) then + {ƒkƒ‹EƒLƒƒƒ‰ƒNƒ^‚̏ˆ—} + IncPBaseNullChar(ApBase^); + if FpBase^ <> Chr($0a) then begin + { CR($0d)‚ÌŽŸ‚ªLF($0a)‚Å‚È‚¢‚Æ‚«‚́A$0d‚ð$0a‚É’uŠ·‚·‚éB} + if FpBase = FpBuff then + FpBase := PChar(FBrokenLine)+Length(FBrokenLine)-1 + else + FpBase := ApBase; + FpBase^ := Chr($0a); + end + end; + {ª‚‘¬‰»‚Ì‚½‚ßIncPBase–„‚ߍž‚Ý} + result := Byte(Ch); + case result of + $81..$9F, $E0..$FC: begin + {«‚‘¬‰»‚Ì‚½‚ßIncPBase–„‚ߍž‚Ý} + Ch := FpBase^; + Inc(FpBase); + if FpBase^ = Chr(0) then + {ƒkƒ‹EƒLƒƒƒ‰ƒNƒ^‚̏ˆ—} + IncPBaseNullChar(Ch); + if Ch = Chr($0a) then begin + {‰üsˆ—} + if (FpBase < PChar(FBrokenLine)) or (FpBase > (PChar(FBrokenLine) + + Length(FBrokenLine))) then begin + {FpBase‚ªƒoƒbƒtƒ@‚ðŽw‚µ‚Ä‚¢‚é‚Æ‚«} + FBrokenLine := ''; + FpLineBegin := FpBase; + Inc(FLineNo); + end else begin + {FpBase‚ªFBrokenLine’†‚ðŽw‚µ‚Ä‚¢‚é‚Æ‚«} + FpLineBegin := FpBase; + Inc(FLineNo); + end; + end; + if FpBase^ = Chr($0d) then begin + ApBase := FpBase; + Inc(FpBase); + if FpBase^ = Chr(0) then + {ƒkƒ‹EƒLƒƒƒ‰ƒNƒ^‚̏ˆ—} + IncPBaseNullChar(ApBase^); + if FpBase^ <> Chr($0a) then begin + { CR($0d)‚ÌŽŸ‚ªLF($0a)‚Å‚È‚¢‚Æ‚«‚́A$0d‚ð$0a‚É’uŠ·‚·‚éB} + if FpBase = FpBuff then + FpBase := PChar(FBrokenLine)+Length(FBrokenLine)-1 + else + FpBase := ApBase; + FpBase^ := Chr($0a); + end + end; + {ª‚‘¬‰»‚Ì‚½‚ßIncPBase–„‚ߍž‚Ý} + result := (result shl 8) or Byte(Ch); + end; + end; + FpForward := FpBase; +end; + +procedure TTxtFile.GetCharNullChar(Ch: Char); +var + Distance, Distance2: Integer; +begin + if FpForward = (PChar(FBrokenLine)+Length(FBrokenLine)) then begin + {FBrokenLine(StringŒ^) ‚Ì’†‚ÅChr(0)‚É’B‚µ‚½‚Æ‚«B} + FpForward := FpBuff; + end else begin + {FpBuff ƒoƒbƒtƒ@‚Ì’†‚ÅChr(0)‚É’B‚µ‚½‚Æ‚«B} + if FpForward < FpBuff+FReadCount then begin + {ƒtƒ@ƒCƒ‹’†‚Ì•s³‚ȃkƒ‹ƒLƒƒƒ‰ƒNƒ^ Chr(0) ‚Í Space($20)‚É‚·‚éB} + FpForward^ := Chr($20); + end else begin + {ƒoƒbƒtƒ@‚̏I‚í‚è‚É—ˆ‚½} + if Eof(FF) then begin + {‚·‚łɃtƒ@ƒCƒ‹‚̏I‚í‚è‚É’B‚µ‚Ä‚¢‚é‚Æ‚«} + if Ch = Chr(0) then + Dec(FpForward); {‚¸n‚Á‚Æresut = Chr(0)‚ð•Ô‚·‚悤‚É‚·‚é} + exit; + end else begin + {‚Ü‚¾ƒtƒ@ƒCƒ‹‚ð“Ç‚ß‚é‚Æ‚«} + if (FpLineBegin >= PChar(FBrokenLine)) and + (FpLineBegin < PChar(FBrokenLine)+Length(FBrokenLine)) then begin + {FpLineBegin‚ªFBrokenLine’†‚ðŽw‚µ‚Ä‚¢‚é‚Æ‚«} + Distance := FpLineBegin-PChar(FBrokenLine); + if (FpBase >= PChar(FBrokenLine)) and + (FpBase < PChar(FBrokenLine)+Length(FBrokenLine)) then + {FpBase‚àFBrokenLine’†‚ðŽw‚µ‚Ä‚¢‚é‚Æ‚«} + Distance2 := FpBase-PChar(FBrokenLine) + else + {FpBase‚̓oƒbƒtƒ@’†‚ðŽw‚µ‚Ä‚¢‚é‚Æ‚«} + Distance2 := Length(FBrokenLine)+FpBase-FpBuff; + FBrokenLine := Concat(FBrokenLine, String(FpBuff)); + FpLineBegin := PChar(FBrokenLine)+Distance; + FpBase := PChar(FBrokenLine)+Distance2; + BuffRead(FpBuff); + FpForward := FpBuff; + end else begin + {FpLineBegin‚ªƒoƒbƒtƒ@’†‚ðŽw‚µ‚Ä‚¢‚é‚Æ‚«} + FBrokenLine := String(FpLineBegin); + FpBase := PChar(FBrokenLine)+(FpBase-FpLineBegin); + FpLineBegin := PChar(FBrokenLine); + BuffRead(FpBuff); + FpForward := FpBuff; + end; + end; + end; + end; +end; + +function TTxtFile.GetChar: Char; +var + ApForward: PChar; +begin + ApForward := FpForward; + result := FpForward^; + Inc(FpForward); + {ƒkƒ‹EƒLƒƒƒ‰ƒNƒ^‚̏ˆ—} + if FpForward^ = Chr(0) then + GetCharNullChar(result); + + if result = Chr($0d) then begin + if FpForward^ <> Chr($0a) then begin + {CR($0d)‚ÌŽŸ‚ªLF($0a)‚Å‚È‚¢‚Æ‚«‚́A$0d‚ð$0a‚É’uŠ·‚·‚éB} + if FpForward = FpBuff then + FpForward := PChar(FBrokenLine)+Length(FBrokenLine)-1 + else + FpForward := ApForward; + FpForward^ := Chr($0a); + result := Chr($0a); + end else begin + result := FpForward^; + Inc(FpForward); + {ƒkƒ‹EƒLƒƒƒ‰ƒNƒ^‚̏ˆ—} + if FpForward^ = Chr(0) then + GetCharNullChar(result); + end; + end; +end; + +function TTxtFile.GetWChar: WChar_t; +var + ApForward: PChar; + Ch: Char; +begin + ApForward := FpForward; + Ch := FpForward^; + Inc(FpForward); + {ƒkƒ‹EƒLƒƒƒ‰ƒNƒ^‚̏ˆ—} + if FpForward^ = Chr(0) then + GetCharNullChar(Ch); + + if Ch = Chr($0d) then begin + if FpForward^ <> Chr($0a) then begin + {CR($0d)‚ÌŽŸ‚ªLF($0a)‚Å‚È‚¢‚Æ‚«‚́A$0d‚ð$0a‚É’uŠ·‚·‚éB} + if FpForward = FpBuff then + FpForward := PChar(FBrokenLine)+Length(FBrokenLine)-1 + else + FpForward := ApForward; + FpForward^ := Chr($0a); + Ch := Chr($0a); + end else begin + Ch := FpForward^; + Inc(FpForward); + {ƒkƒ‹EƒLƒƒƒ‰ƒNƒ^‚̏ˆ—} + if FpForward^ = Chr(0) then + GetCharNullChar(Ch); + end; + end; + result := Byte(Ch); + case result of + $81..$9F, $E0..$FC: begin + Ch := FpForward^; + Inc(FpForward); + {ƒkƒ‹EƒLƒƒƒ‰ƒNƒ^‚̏ˆ—} + if FpForward^ = Chr(0) then + GetCharNullChar(Ch); + result := (result shl 8) or Byte(Ch); + end; + end; +end; + +function TTxtFile.GetThisLine: RE_LineInfo_t; +var + i: Integer; +begin + Application.ProcessMessages; + if FpCancelRequest^ then + raise EGrepCancel.Create('CancelRequest'); + + {s––‚ðŒ©‚Â‚¯‚éB} + while FpBase^ <> Chr($0a) do begin + IncPBase; + end; + + if (FpLineBegin >= PChar(FBrokenLine)) and + (FpLineBegin < PChar(FBrokenLine)+Length(FBrokenLine)) then begin + {FpLineBegin‚ªFBrokenLine’†‚ðŽw‚µ‚Ä‚¢‚é‚Æ‚«} + if (FpBase >= PChar(FBrokenLine)) and + (FpBase < PChar(FBrokenLine)+Length(FBrokenLine)) then begin + {FpBase‚àFBrokenLine’†‚ðŽw‚µ‚Ä‚¢‚é‚Æ‚«} + result.Line := Copy(FBrokenLine, FpLineBegin-PChar(FBrokenLine)+1, + FpBase-FpLineBegin); + end else begin + {FpBase‚̓oƒbƒtƒ@’†‚ðŽw‚µ‚Ä‚¢‚é‚Æ‚«} + SetString(result.Line, FpBuff, FpBase-FpBuff); + result.Line := Concat(Copy(FBrokenLine, FpLineBegin-PChar(FBrokenLine)+1, + Length(FBrokenLine)), result.Line); + end; + end else begin + SetString(result.Line, FpLineBegin, FpBase-FpLineBegin); + end; + + {TrimRight} + i := Length(result.Line); + while (i > 0) and (result.Line[i] in [Chr($0d), Chr($0a)]) do Dec(I); + result.Line := Copy(result.Line, 1, i); + + result.LineNo := FLineNo; +end; + +function StringToWordArray(Str: String; pWCharArray: PWordArray): Integer; +var + i, j: Integer; + WChar: WChar_t; +begin + i := 1; + j := 0; + WChar := GetWChar(Str, i); + while WChar <> 0 do begin + pWCharArray^[j] := WChar; + Inc(j); + WChar := GetWChar(Str, i); + end; + pWCharArray^[j] := 0; + result := j; +end; + +constructor TGrep.Create(AOwner: TComponent); +begin + inherited Create(AOwner); + + ListOfFuzzyCharDic.Add(RE_FuzzyCharDic); {ƒLƒƒƒ‰ƒNƒ^“¯ˆêŽ‹Ž«‘‚ð’ljÁ} +end; + +procedure TGrep.SetRegExp(Str: String); +begin + inherited SetRegExp(Str); + if Self.RegExpIsSimple then + FGrepProc := GrepByStr + else + FGrepProc := GrepByRegExp; +end; + +function TGrep.GetLineHeadWChar: WChar_t; +begin + result := CONST_LF; +end; + +function TGrep.GetLineTailWChar: WChar_t; +begin + result := CONST_LF; +end; + +procedure TGrep.GrepByRegExp(FileName: String); +var + TxtFile: TTxtFile; + pDFAState,pInitialDFAState: RE_pDFAState_t; + LineInfo: RE_LineInfo_t; + DFA: TRE_DFA; + WChar: WChar_t; + pSub: RE_pDFAStateSub_t; +begin + {OnMatchƒCƒxƒ“ƒgƒnƒ“ƒhƒ‰‚ªÝ’肳‚ê‚Ä‚¢‚È‚¢‚Æ‚«‚́A‰½‚à‚µ‚È‚¢} + if not Assigned(FOnMatch) then + exit; + + FCancel := False; + DFA := GetCurrentDFA; + pInitialDFAState := DFA.pInitialState; + try + TxtFile := TTxtFile.Create(FileName, Self.FCancel); + except on EEndOfFile do exit; {ƒtƒ@ƒCƒ‹ƒTƒCƒY‚O‚Ì‚Æ‚«‚Íexit} end; + + try + try + {ŒŸõ} + while True do begin + repeat + WChar := TxtFile.AdvanceBase; + {«NextDFAStateƒƒ\ƒbƒh–„‚ߍž‚Ý} + pDFAState := nil; + pSub := pInitialDFAState^.next; + while pSub <> nil do begin + if WChar < pSub^.CharClass.StartChar then + break + else if WChar <= pSub^.CharClass.EndChar then begin + pDFAState := pSub^.TransitTo; + break; + end; + pSub := pSub^.next; + end; + {ªNextDFAStateƒƒ\ƒbƒh–„‚ߍž‚Ý} + until pDFAState <> nil; + + while True do begin + if pDFAState^.accepted then begin + {ƒ}ƒbƒ`‚µ‚½} + LineInfo := TxtFile.GetThisLine; + FOnMatch(Self, LineInfo); + break; + end; + + {DFA‚ðó‘Ô‘JˆÚ‚³‚¹‚é} + pDFAState := NextDFAState(pDFAState, TxtFile.GetWChar); + if pDFAState = nil then begin + break; + end; + end; + end; + finally TxtFile.Free; end; + except on EEndOfFile do ; end; {Catch EEndOfFile} +end; + +procedure TGrep.GrepByStr(FileName: String); +var + TxtFile: TTxtFile; + Pattern: String; + pPat: PWordArray; + PatLen: Integer; + i: Integer; + LineInfo: RE_LineInfo_t; +begin + FCancel := False; + Pattern := Self.SimpleRegExp; + {OnMatchƒCƒxƒ“ƒgƒnƒ“ƒhƒ‰‚ªÝ’肳‚ê‚Ä‚¢‚È‚¢‚Æ‚«‚́A‰½‚à‚µ‚È‚¢} + if not Assigned(FOnMatch) then + exit; + + try + TxtFile := TTxtFile.Create(FileName, Self.FCancel); + except on EEndOfFile do exit; {ƒtƒ@ƒCƒ‹ƒTƒCƒY‚O‚Ì‚Æ‚«‚Íexit} end; + + try + pPat := AllocMem(Length(Pattern)*2+2); + try + PatLen := StringToWordArray(Pattern, pPat); + try + while True do begin + while (TxtFile.AdvanceBase <> Word(pPat^[0])) do + ; + i := 1; + while True do begin + if i = PatLen then begin + LineInfo := TxtFile.GetThisLine; + FOnMatch(Self, LineInfo); + break; + end; + if TxtFile.GetWChar = Word(pPat^[i]) then + Inc(i) + else + break; + end; + end; + except on EEndOfFile do ;{Catch EEndOfFile} end; + finally FreeMem(pPat, Length(Pattern)*2+2); end; + finally TxtFile.Free; end; +end; + +procedure MakeFuzzyCharDic; +var + StrList: TStrings; + i: Integer; +begin + RE_FuzzyCharDic := nil; + RE_FuzzyCharDic := TList.Create; + + i := 0; + repeat + StrList := TStringList.Create; + try + RE_FuzzyCharDic.Add(StrList); + except + on Exception do begin + StrList.Free; + raise; + end; + end; + + StrList.CommaText := REFuzzyWChars[i]; + Inc(i); + until i > High(REFuzzyWChars); +end; + +procedure DestroyFuzzyCharDic; +var + i: Integer; +begin + for i := 0 to RE_FuzzyCharDic.Count-1 do + TStringList(RE_FuzzyCharDic[i]).Free; + RE_FuzzyCharDic.Free; +end; + +procedure Register; +begin + RegisterComponents('RegExp', [TGrep, TAWKStr]); +end; + +initialization + MakeFuzzyCharDic; + +finalization + DestroyFuzzyCharDic; + +end. diff --git a/gikoNavi.res b/gikoNavi.res index 44348ad0b080e245113ed31b247c682ed1292484..067a66444d2de6c5e43a6a08c3ed07b641e39c09 100644 GIT binary patch delta 29 lcmcbkct>%=8(z+EW(Ec~21bVP&0l#r85vDC|L42Q2mp^>2{-@% delta 29 lcmcbkct>%=8(z*ZW(Ec~21bUk&0l#r85vDB|L42Q2mp^Z2{ix! -- 2.11.0