using System; using System.Collections.Generic; using System.Text; namespace NT2chView { class NTSimString { List mSplitedString; const int TOKEN_SPACE = 0; const int TOKEN_HIRAKANA = 1; const int TOKEN_NUMALPHA = 2; const int TOKEN_OTHER = 3; public NTSimString(String seed){ mSplitedString = new List(); int current = TOKEN_OTHER; int len = seed.Length; int start = 0; int i = 0; for(; i < len; i++){ char c = seed[i]; int token = getToken(c); if(token == TOKEN_SPACE){ if(current != TOKEN_SPACE){ if(start < i){ mSplitedString.Add(seed.Substring(start, i - start)); } current = TOKEN_SPACE; } start = i+1; }else{ if(current != token){ if(start < i){ mSplitedString.Add( seed.Substring(start, i - start)); } current = token; start = i; } } } if(start < i && current != TOKEN_SPACE){ mSplitedString.Add( seed.Substring(start, i - start)); } } public int match(String source){ int match_len = 0; foreach (string line in mSplitedString) { //int len; //int idx = line.match(source, 0, out len); //if (idx >= 0 && len > 1) //{ // match_len += len; //} if (0 <= source.IndexOf(line, StringComparison.CurrentCultureIgnoreCase)) { match_len += line.Length; } } return match_len; } public void log(){ //DebugUtil.log("SimString logging..."); //foreach(String line in mSplitedString){ //DebugUtil.log(line); //} } int getToken(char c){ for(int i = 0; i < mSeparatorChar.Length; i++){ if(c == mSeparatorChar[i]){ return TOKEN_SPACE; } } for(int i = 0; i < mHiraKakaChar.Length; i++){ if(c == mHiraKakaChar[i]){ return TOKEN_HIRAKANA; } } for(int i = 0; i < mNumAlphaChar.Length; i++){ if(c == mNumAlphaChar[i]){ return TOKEN_NUMALPHA; } } return TOKEN_OTHER; } static char [] mSeparatorChar = { ' ',' ','(',')','【','】','(',')','「','」','{','}','{','}','〈','〉', '《','》','「','」','『','』',',','.','、','。','★','※','*','?','!','?','!','・'}; static char [] mHiraKakaChar = { 'ー','ー','あ','ア','ぁ','ァ','い','イ','ぃ','ィ','う','ウ','ぅ','ゥ', 'え','エ','ぇ','ェ','お','オ','ぉ','ォ','を','ヲ', 'か','カ','ヵ','ヵ','き','キ','く','ク','け','ケ','ヶ','ヶ','こ','コ', 'さ','サ','し','シ','す','ス','せ','セ','そ','ソ', 'た','タ','ち','チ','つ','ツ','っ','ッ','て','テ','と','ト', 'な','ナ','に','ニ','ぬ','ヌ','ね','ネ','の','ノ', 'は','ハ','ワ','わ','ひ','ヒ','ふ','フ','へ','ヘ','ほ','ホ', 'ま','マ','み','ミ','む','ム','め','メ','も','モ', 'や','ヤ','ゃ','ャ','ゆ','ユ','ゅ','ュ','よ','ヨ','ょ','ョ', 'ラ','ら','ラ','り','リ','リ','る','ル','ル', 'れ','レ','レ','ろ','ロ','ロ','ん','ン', 'が','ガ','ぎ','ギ','ぐ','グ','げ','ゲ','ご','ゴ', 'ざ','ザ','じ','ジ','ぢ','ヂ','ず','ズ','づ','ヅ', 'ぜ','ゼ','ぞ','ゾ','だ','ダ','で','デ','ど','ド', 'ば','バ','び','ビ','ぶ','ブ','べ','ベ','ぼ','ボ', 'ぱ','パ','ぴ','ピ','ぷ','プ','ぺ','ペ','ぽ','ポ'}; static char [] mNumAlphaChar = { '-','_','-','_', '0','0','1','1','2','2','3','3','4','4', '5','5','6','6','7','7','8','8','9','9', 'A','a','A','a','B','b','B','b','C','c','C','c', 'D','d','D','d','E','e','E','e','F','f','F','f', 'G','g','G','g','H','h','H','h','I','i','I','i', 'J','j','J','j','K','k','K','k', 'L','l','L','l', 'M','m','M','m','N','n','N','n','O','o','O','o', 'P','p','P','p','Q','q','Q','q','R','r','R','r', 'S','s','S','s','T','t','T','t', 'U','u','U','u', 'V','v','V','v','W','w','W','w','X','x','X','x', 'Y','y','Y','y','Z','z','Z','z','%','%','$','$','#','#', '@','@','+','+'}; } }