DCHookTest/StringLib.cpp

   1 //---------------------------------------------------------------------------
   2 #include <vcl.h>
   3 #pragma hdrstop
   4
   5 #include "StringLib.h"
   6
   7 //---------------------------------------------------------------------------
   8 #pragma package(smart_init)
   9
  10 // pos\82©\82ç\92P\8cê\82ð\90Ø\82è\8fo\82·
  11 // pos\82Ístr\82©\82ç\82Ì\83I\83t\83Z\83b\83g
  12 // start,end\82Ístr\82©\82ç\82Ì\83I\83t\83Z\83b\83g\82ð\95Ô\82·
  13 // prevstart\82Í\81Astart\82Ì\82P\82Â\91O\82Ì\92P\8cê\81A\96³\82¢\8fê\8d\87\82Ístart\82Æ\93¯\82¶\92l
  14 // fLongest\82ªTRUE\82Ì\8fê\8d\87\82Í\81A\8dÅ\8d\8210\8cê\82Ü\82Åget\82·\82é(.)\82ð\8fI\82í\82è\82Æ\82Ý\82È\82·
  15 // about   : \82¢\82¢\89Á\8c¸\82È\82Æ\82±\82ë\82ð\83N\83\8a\83b\83N\82µ\82Ä\82à\92P\8cê\82³\82¦\82 \82ê\82Î\95K\82¸get\82·\82é
  16 // alnum   : \89p\90\94\8e\9a\88È\8aO\82Í\96³\8e\8b
  17 // numPrev : N\8cê\91O\82Ì\92P\8cê\82ð\8fE\82¤
  18
  19 bool GetWord( const tchar *str, int pos, int &start, int &end, int &prevstart, bool fLongest, int wordcount, bool about, bool alnum, int numPrev )
  20 {
  21         if ( !about )
  22                 if ( !IsWordChar( *(str+pos) ) )
  23                         return false;
  24 //      while ( *(str+pos) == ' ' || *(str+pos) == '\t' ){
  25 //              pos++;
  26 //      }
  27
  28         // \89ü\8ds\82Ì\8fê\8d\87\82Í\96³\82µ
  29         if ( str[pos] == '\r' || str[pos] == '\n' ){
  30                 return false;
  31         }
  32
  33         // pos\82ª\95¶\8e\9a\97ñ\82Ì\8fI\92[\81A\82 \82é\82¢\82Í\95¶\8e\9a\97ñ\82Ì\8fI\92[\82©\82ç\82P\82Â\91O\82Ì\89ü\8ds\82Å\82 \82é\8fê\8d\87\82Í\82¾\82ß //
  34         if ( !str[pos] || (!str[pos+1] && ((tuchar)str[pos] < ' ')) )
  35                 return false;
  36
  37         const tchar *orgp = str;
  38         const tchar *p = orgp;
  39 rescan:
  40         // \92P\8cê\82Ì\90æ\93ª\82ð\92T\82· //
  41         const tchar *wordtop = NULL;
  42         while ( *p ){
  43                 if ( alnum ){
  44                         // \89p\90\94\8e\9a\82Ì\82Ý
  45                         if ( isalphanum(*p) ){
  46                                 // \92P\8cê\82Ì\8en\82Ü\82è
  47                                 wordtop = p;
  48                                 break;
  49                         }
  50                         p = NEXT_CHAR(p);
  51                 } else {
  52                         // \95¶\8e\9a\8eí\82ð\8bæ\95Ê\82µ\82È\82¢
  53                         if ( IsWordChar( *p ) )
  54                         {
  55                                 wordtop = p;
  56                                 break;
  57                         }
  58                         p++;
  59                 }
  60         }
  61         if ( !wordtop )
  62                 wordtop = p;
  63         const tchar *wordtail = NULL;
  64         const tchar *wordprev = NULL;
  65         const tchar *wordprev2 = NULL;
  66         bool fSpc = false;
  67         // pos\82É\82 \82é\92P\8cê\82Ìstart\82Æend\82ð\8b\81\82ß\82é
  68         while ( *p ){
  69                 if ( !IsWordChar( *p ) || (alnum && !isalphanum(*p)) ){
  70                         // \94ñ\92P\8cê\95¶\8e\9a
  71                         if ( orgp + pos < p ){
  72                                 // \92P\8cê\82Ì\8fI\82í\82è
  73                                 wordtail = p;
  74                                 break;
  75                         } else {
  76                                 if ( alnum ){
  77                                         if ( _ismbblead(*p) ){
  78                                                 // \93ú\96{\8cê\82Ì\8fê\8d\87\82Í\81A\92P\8f\83\82È\94ñ\92P\8cê\95¶\8e\9a\82Æ\82Í\82Ý\82È\82³\82¸\81A
  79                                                 // \8dÅ\8f\89\82©\82çscan\82ð\82â\82è\92¼\82·
  80                                                 goto rescan;
  81                                         }
  82                                 }
  83                         }
  84                         fSpc = true;
  85                 } else {
  86                         // \92P\8cê\95¶\8e\9a
  87                         if ( fSpc ){
  88                                 wordprev2 = wordprev;
  89                                 wordprev = wordtop;
  90                                 wordtop = p;
  91                                 fSpc = false;
  92                         }
  93                 }
  94                 if ( alnum ){
  95                         p = NEXT_CHAR( p );
  96                 } else {
  97                         p++;
  98                 }
  99         }
 100
 101         // wordtop\82ªpos\82æ\82è\8cã\82ë
 102         if ( (unsigned)wordtop - (about ? 3 : 0) > (unsigned)(orgp + pos) ){
 103                 // \92P\8cê\82Ì\8bæ\90Ø\82è\82¾\82Á\82½\81A\92P\8cê\82Ì\8dÅ\8f\89\82ªpos\82æ\82è\8cã\82ë\82É\82 \82Á\82½
 104                 // about = true\82Ì\8fê\8d\87\82Í\81A\82R\95¶\8e\9a\95ª\82Ù\82Ç\82¢\82¢\82©\82°\82ñ\8d·\82ð\91«\82µ\82Ä\82¨\82
 105                 return false;
 106         }
 107
 108         if ( fLongest ){
 109 //              int wordcount = 10;     // 10\8cê\82Ü\82Å
 110                 while ( *p && *p != '.' ){
 111                         if ( !IsWordChar( *p ) || (alnum && !isalphanum(*p)) ){
 112                                 if ( !fSpc ){
 113                                         if ( --wordcount == 0 )
 114                                                 break;
 115                                         fSpc = true;
 116                                 }
 117                         } else {
 118                                 fSpc = false;
 119                         }
 120                         if ( alnum ){
 121                                 p = NEXT_CHAR( p );
 122                         } else {
 123                                 p++;
 124                         }
 125                 }
 126                 wordtail = p;
 127         }
 128
 129         if ( !wordtail )
 130                 wordtail = p;
 131         start = STR_DIFF( wordtop, orgp );
 132         end = STR_DIFF( wordtail, orgp );
 133         if ( start == end )
 134                 return false;
 135         if ( numPrev >= 2 && wordprev2 ){
 136                 prevstart = STR_DIFF( wordprev2, orgp );
 137         } else
 138         if ( numPrev >= 1 && wordprev ){
 139                 prevstart = STR_DIFF( wordprev, orgp );
 140         } else {
 141                 prevstart = start;
 142         }
 143         return true;
 144 }
 145 #if 0
 146 bool mbGetWord( const tchar *str, int pos, int &start, int &end, bool fLongest, int wordcount )
 147 {
 148         ushort c;
 149         const tchar *sp = str + pos;
 150         LD_CHAR( c, sp );
 151         if ( !mbIsWordChar( c ) )
 152                 return false;
 153
 154         const tchar *orgp = str;
 155         const tchar *p = orgp;
 156         const tchar *wordtop = p;
 157         const tchar *wordtail = NULL;
 158         bool fSpc = false;
 159         while ( 1 ){
 160                 sp = p;
 161                 LD_CHAR( c, p );
 162                 if ( (tuchar)c == 0x00 )
 163                         break;
 164                 if ( !mbIsWordChar( c ) ){
 165                         if ( orgp + pos < sp ){
 166                                 wordtail = sp;
 167                                 break;
 168                         }
 169                         fSpc = true;
 170                 } else {
 171                         if ( fSpc ){
 172                                 wordtop = sp;
 173                                 fSpc = false;
 174                         }
 175                 }
 176         }
 177         p = sp;
 178         if ( wordtop > orgp + pos ){
 179                 // \92P\8cê\82Ì\8bæ\90Ø\82è\82¾\82Á\82½
 180                 return false;
 181         }
 182
 183         if ( fLongest ){
 184 //              int wordcount = 10;     // 10\8cê\82Ü\82Å
 185                 while ( 1 ){
 186                         sp = p;
 187                         LD_CHAR( c, p );
 188                         if ( (tuchar)c == 0x00 )
 189                                 break;
 190                         if ( c == '.' )
 191                                 break;
 192                         if ( !mbIsWordChar( c ) ){
 193                                 if ( !fSpc ){
 194                                         if ( --wordcount == 0 )
 195                                                 break;
 196                                         fSpc = true;
 197                                 }
 198                         } else {
 199                                 fSpc = false;
 200                         }
 201                 }
 202                 wordtail = sp;
 203         }
 204
 205         if ( !wordtail )
 206                 wordtail = sp;
 207         start = STR_DIFF( wordtop, orgp );
 208         end = STR_DIFF( wordtail, orgp );
 209         return true;
 210 }
 211 bool mbIsWordChar( unsigned short c )
 212 {
 213         if ( c < 0x100 ){
 214                 // \94¼\8ap\95¶\8e\9a
 215                 if ( isalphanum( c ) || c == '-' || c == '\'' ||
 216                          (c >= 0x86 && c <= 0xBF ) )
 217                         return true;
 218                 else
 219                         return false;
 220         }
 221         // \91S\8ap\95¶\8e\9a
 222         if (
 223                 // \89p\90\94\8e\9a\81A\83J\83^\83J\83i\81A\82Ð\82ç\82ª\82È\81A\83M\83\8a\83V\83\83\81A\83\8d\83V\83A
 224                 (c >= 0x824f && c <= 0x8491) ||
 225                 // \8a¿\8e\9a
 226                 (c >= 0x889F) ||
 227                 // \83A\83|\83X\83g\83\8d\83t\83B
 228                 (c == CODE_APOSTROPHE)
 229                 )
 230                 return true;
 231         else
 232                 return false;
 233 }
 234 #endif
 235 //\89p\90\94\8e\9a\82Ì\94»\95Ê
 236 int isalphanum( tuchar c )
 237 {
 238         if ( c >= _t('A') && c <= _t('Z') ){
 239                 return 1;
 240         }
 241         if ( c >= _t('a') && c <= _t('z') ){
 242                 return 1;
 243         }
 244         if ( c >= _t('0') && c <= _t('9') ){
 245                 return 1;
 246         }
 247         return 0;
 248 }
 249