OSDN Git Service

BOOL→bool
[dokopop/dokopop.git] / DCHookTest / StringLib.cpp
1 //---------------------------------------------------------------------------
2 #include <vcl.h>
3 #pragma hdrstop
4
5 #include "StringLib.h"
6
7 //---------------------------------------------------------------------------
8 #pragma package(smart_init)
9
10 // pos\82©\82ç\92P\8cê\82ð\90Ø\82è\8fo\82·
11 // pos\82Ístr\82©\82ç\82Ì\83I\83t\83Z\83b\83g
12 // start,end\82Ístr\82©\82ç\82Ì\83I\83t\83Z\83b\83g\82ð\95Ô\82·
13 // prevstart\82Í\81Astart\82Ì\82P\82Â\91O\82Ì\92P\8cê\81A\96³\82¢\8fê\8d\87\82Ístart\82Æ\93¯\82\92l
14 // fLongest\82ªTRUE\82Ì\8fê\8d\87\82Í\81A\8dÅ\8d\8210\8cê\82Ü\82Åget\82·\82é(.)\82ð\8fI\82í\82è\82Æ\82Ý\82È\82·
15 // about   : \82¢\82¢\89Á\8c¸\82È\82Æ\82±\82ë\82ð\83N\83\8a\83b\83N\82µ\82Ä\82à\92P\8cê\82³\82¦\82 \82ê\82Î\95K\82¸get\82·\82é
16 // alnum   : \89p\90\94\8e\9a\88È\8aO\82Í\96³\8e\8b
17
18 bool GetWord( const tchar *str, int pos, int &start, int &end, int &prevstart, bool fLongest, int wordcount, bool about, bool alnum )
19 {
20         if ( !about )
21                 if ( !IsWordChar( *(str+pos) ) )
22                         return false;
23 //      while ( *(str+pos) == ' ' || *(str+pos) == '\t' ){
24 //              pos++;
25 //      }
26
27         // \89ü\8ds\82Ì\8fê\8d\87\82Í\96³\82µ
28         if ( str[pos] == '\r' || str[pos] == '\n' ){
29                 return false;
30         }
31
32         // pos\82ª\95\8e\9a\97ñ\82Ì\8fI\92[\81A\82 \82é\82¢\82Í\95\8e\9a\97ñ\82Ì\8fI\92[\82©\82ç\82P\82Â\91O\82Ì\89ü\8ds\82Å\82 \82é\8fê\8d\87\82Í\82¾\82ß //
33         if ( !str[pos] || (!str[pos+1] && ((tuchar)str[pos] < ' ')) )
34                 return false; 
35
36         const tchar *orgp = str;
37         const tchar *p = orgp;
38 rescan:
39         // \92P\8cê\82Ì\90æ\93ª\82ð\92T\82· //
40         const tchar *wordtop = NULL;
41         while ( *p ){
42                 if ( alnum ){
43                         // \89p\90\94\8e\9a\82Ì\82Ý
44                         if ( isalphanum(*p) ){
45                                 // \92P\8cê\82Ì\8en\82Ü\82è
46                                 wordtop = p;
47                                 break;
48                         }
49                         p = NEXT_CHAR(p);
50                 } else {
51                         // \95\8e\9a\8eí\82ð\8bæ\95Ê\82µ\82È\82¢
52                         if ( IsWordChar( *p ) )
53                         {
54                                 wordtop = p;
55                                 break;
56                         }
57                         p++;
58                 }
59         }
60         if ( !wordtop )
61                 wordtop = p; 
62         const tchar *wordtail = NULL;
63         const tchar *wordprev = NULL;
64         bool fSpc = false;
65         // pos\82É\82 \82é\92P\8cê\82Ìstart\82Æend\82ð\8b\81\82ß\82é
66         while ( *p ){
67                 if ( !IsWordChar( *p ) || (alnum && !isalphanum(*p)) ){
68                         // \94ñ\92P\8cê\95\8e\9a
69                         if ( orgp + pos < p ){
70                                 // \92P\8cê\82Ì\8fI\82í\82è
71                                 wordtail = p;
72                                 break;
73                         } else {
74                                 if ( alnum ){
75                                         if ( _ismbblead(*p) ){
76                                                 // \93ú\96{\8cê\82Ì\8fê\8d\87\82Í\81A\92P\8f\83\82È\94ñ\92P\8cê\95\8e\9a\82Æ\82Í\82Ý\82È\82³\82¸\81A
77                                                 // \8dÅ\8f\89\82©\82çscan\82ð\82â\82è\92¼\82·
78                                                 goto rescan;
79                                         }
80                                 }
81                         }
82                         fSpc = true;
83                 } else {
84                         // \92P\8cê\95\8e\9a
85                         if ( fSpc ){
86                                 wordprev = wordtop;
87                                 wordtop = p;
88                                 fSpc = FALSE;
89                         }
90                 }
91                 if ( alnum ){
92                         p = NEXT_CHAR( p );
93                 } else {
94                         p++;
95                 }
96         }
97
98         // wordtop\82ªpos\82æ\82è\8cã\82ë
99         if ( (unsigned)wordtop - (about ? 3 : 0) > (unsigned)(orgp + pos) ){
100                 // \92P\8cê\82Ì\8bæ\90Ø\82è\82¾\82Á\82½\81A\92P\8cê\82Ì\8dÅ\8f\89\82ªpos\82æ\82è\8cã\82ë\82É\82 \82Á\82½
101                 // about = true\82Ì\8fê\8d\87\82Í\81A\82R\95\8e\9a\95ª\82Ù\82Ç\82¢\82¢\82©\82°\82ñ\8d·\82ð\91«\82µ\82Ä\82¨\82­
102                 return false;
103         }
104
105         if ( fLongest ){
106 //              int wordcount = 10;     // 10\8cê\82Ü\82Å
107                 while ( *p && *p != '.' ){
108                         if ( !IsWordChar( *p ) || (alnum && !isalphanum(*p)) ){
109                                 if ( !fSpc ){
110                                         if ( --wordcount == 0 )
111                                                 break;
112                                         fSpc = true;
113                                 }
114                         } else {
115                                 fSpc = FALSE;
116                         }
117                         if ( alnum ){
118                                 p = NEXT_CHAR( p );
119                         } else {
120                                 p++;
121                         }
122                 }
123                 wordtail = p;
124         }
125
126         if ( !wordtail )
127                 wordtail = p;
128         start = STR_DIFF( wordtop, orgp );
129         end = STR_DIFF( wordtail, orgp );
130         if ( start == end )
131                 return false;
132         if ( wordprev ){
133                 prevstart = STR_DIFF( wordprev, orgp );
134         } else {
135                 prevstart = start;
136         }
137         return true;
138 }
139 #if 0
140 bool mbGetWord( const tchar *str, int pos, int &start, int &end, bool fLongest, int wordcount )
141 {
142         ushort c;
143         const tchar *sp = str + pos;
144         LD_CHAR( c, sp );
145         if ( !mbIsWordChar( c ) )
146                 return false;
147
148         const tchar *orgp = str;
149         const tchar *p = orgp;
150         const tchar *wordtop = p;
151         const tchar *wordtail = NULL;
152         bool fSpc = false;
153         while ( 1 ){
154                 sp = p;
155                 LD_CHAR( c, p );
156                 if ( (tuchar)c == 0x00 )
157                         break;
158                 if ( !mbIsWordChar( c ) ){
159                         if ( orgp + pos < sp ){
160                                 wordtail = sp;
161                                 break;
162                         }
163                         fSpc = true;
164                 } else {
165                         if ( fSpc ){
166                                 wordtop = sp;
167                                 fSpc = false;
168                         }
169                 }
170         }
171         p = sp;
172         if ( wordtop > orgp + pos ){
173                 // \92P\8cê\82Ì\8bæ\90Ø\82è\82¾\82Á\82½
174                 return false;
175         }
176
177         if ( fLongest ){
178 //              int wordcount = 10;     // 10\8cê\82Ü\82Å
179                 while ( 1 ){
180                         sp = p;
181                         LD_CHAR( c, p );
182                         if ( (tuchar)c == 0x00 )
183                                 break;
184                         if ( c == '.' )
185                                 break;
186                         if ( !mbIsWordChar( c ) ){
187                                 if ( !fSpc ){
188                                         if ( --wordcount == 0 )
189                                                 break;
190                                         fSpc = true;
191                                 }
192                         } else {
193                                 fSpc = false;
194                         }
195                 }
196                 wordtail = sp;
197         }
198
199         if ( !wordtail )
200                 wordtail = sp;
201         start = STR_DIFF( wordtop, orgp );
202         end = STR_DIFF( wordtail, orgp );
203         return true;
204 }
205 bool mbIsWordChar( unsigned short c )
206 {
207         if ( c < 0x100 ){
208                 // \94¼\8ap\95\8e\9a
209                 if ( isalphanum( c ) || c == '-' || c == '\'' ||
210                          (c >= 0x86 && c <= 0xBF ) )
211                         return true;
212                 else
213                         return false;
214         }
215         // \91S\8ap\95\8e\9a
216         if (
217                 // \89p\90\94\8e\9a\81A\83J\83^\83J\83i\81A\82Ð\82ç\82ª\82È\81A\83M\83\8a\83V\83\83\81A\83\8d\83V\83A
218                 (c >= 0x824f && c <= 0x8491) ||
219                 // \8a¿\8e\9a
220                 (c >= 0x889F) ||
221                 // \83A\83|\83X\83g\83\8d\83t\83B
222                 (c == CODE_APOSTROPHE)
223                 )
224                 return true;
225         else
226                 return false;
227 }
228 #endif
229 //\89p\90\94\8e\9a\82Ì\94»\95Ê
230 int isalphanum( tuchar c )
231 {
232         if ( c >= _t('A') && c <= _t('Z') ){
233                 return 1;
234         }
235         if ( c >= _t('a') && c <= _t('z') ){
236                 return 1;
237         }
238         if ( c >= _t('0') && c <= _t('9') ){
239                 return 1;
240         }
241         return 0;
242 }
243