1 ///////////////////////////////////////////////////////////////////////////
4 // Updated: 19-Jul-1998
6 // Copyright: Marko Macek
7 // E-mail: Marko.Macek@gmx.net or mark@hermes.si
9 // Some handy stuff to deal with regular expressions
11 // You are free to use or modify this code to the following restrictions:
12 // - Acknowledge me somewhere in your about box, simple "Parts of code by.."
13 // will be enough. If you can't (or don't want to), contact me personally.
14 // - LEAVE THIS HEADER INTACT
15 ////////////////////////////////////////////////////////////////////////////
24 #include <Poco/RegularExpression.h>
25 #include <Poco/UnicodeConverter.h>
29 using Poco::RegularExpression;
30 using Poco::UnicodeConverter;
33 std::unique_ptr<RegularExpression> regexp;
36 RxNode *RxCompile(LPCTSTR Regexp, unsigned int RxOpt) {
38 if (Regexp == nullptr) return nullptr;
40 if (n == nullptr) return nullptr;
42 const char * errormsg = nullptr;
45 std::string regexString;
47 pcre_opts |= RegularExpression::RE_UTF8;
48 UnicodeConverter::toUTF8(Regexp, regexString);
53 // pcre_opts |= PCRE_BSR_ANYCRLF;
54 if ((RxOpt & RX_CASE) == 0)
55 pcre_opts |= RegularExpression::RE_CASELESS;
59 n->regexp.reset(new RegularExpression(regexString, pcre_opts));
69 void RxFree(RxNode *n) {
76 int RxExec(RxNode *Regexp, LPCTSTR Data, size_t Len, LPCTSTR Start, RxMatchRes *Match) {
77 if (Regexp == nullptr) return 0;
80 for (i = 0; i < NSEXPS; i++) Match->Open[i] = Match->Close[i] = -1;
82 RegularExpression::MatchVec ovector;
83 std::string compString;
85 size_t startoffset = ucr::Utf8len_of_string(Data, Start - Data);
86 UnicodeConverter::toUTF8(Data, Len, compString);
88 int startoffset = Start - Data;
93 result = Regexp->regexp->match(compString, startoffset, ovector);
100 for (i = 0; i < result; i++)
103 Match->Open[i] = ucr::stringlen_of_utf8(compString.c_str(), ovector[i].offset);
104 Match->Close[i] = ucr::stringlen_of_utf8(compString.c_str(), ovector[i].offset + ovector[i].length);
106 Match->Open[i] = ovector[i].offset;
107 Match->Close[i] = ovector[i].offset + ovector[i].length;
115 #define FLAG_UP_CASE 1
116 #define FLAG_DOWN_CASE 2
117 #define FLAG_UP_NEXT 4
118 #define FLAG_DOWN_NEXT 8
120 static int add(size_t *len, LPTSTR *s, LPCTSTR a, size_t alen, int &flag) {
121 size_t NewLen = *len + alen;
130 LPTSTR p = (LPTSTR) realloc(*s, NewLen * sizeof(TCHAR));
135 memcpy(*s + *len, a, alen * sizeof(TCHAR));
137 *s = (LPTSTR) malloc(NewLen * sizeof(TCHAR));
139 memcpy(*s, a, alen * sizeof(TCHAR));
142 if (flag & FLAG_UP_CASE) {
143 LPTSTR p = *s + *len;
145 for (i = 0; i < alen; i++) {
146 *p = (TCHAR)_totupper(*p);
149 } else if (flag & FLAG_DOWN_CASE) {
150 LPTSTR p = *s + *len;
152 for (i = 0; i < alen; i++) {
153 *p = (TCHAR)_totlower(*p);
157 if (flag & FLAG_UP_NEXT) {
158 LPTSTR p = *s + *len;
160 *p = (TCHAR)_totupper(*p);
161 flag &= ~FLAG_UP_NEXT;
162 } else if (flag & FLAG_DOWN_NEXT) {
163 LPTSTR p = *s + *len;
165 *p = (TCHAR)_totlower(*p);
166 flag &= ~FLAG_DOWN_NEXT;
172 int RxReplace(LPCTSTR rep, LPCTSTR Src, int /*len*/, RxMatchRes match, LPTSTR *Dest, int *Dlen) {
181 // add(&dlen, &dest, Src, match.Open[0]);
183 switch (Ch = *rep++) {
185 // add(&dlen, &dest, Src + match.Open[0], match.Close[0] - match.Open[0], flag);
188 switch (Ch = *rep++) {
190 case _T('1'): case _T('2'): case _T('3'):
191 case _T('4'): case _T('5'): case _T('6'):
192 case _T('7'): case _T('8'): case _T('9'):
195 if (match.Open[n] != -1 && match.Close[n] != -1) {
196 add(&dlen, &dest, Src + match.Open[n], match.Close[n] - match.Open[n], flag);
200 if (dest) free(dest);
202 case _T('r'): Ch = _T('\r'); add(&dlen, &dest, &Ch, 1, flag); break;
203 case _T('n'): Ch = _T('\n'); add(&dlen, &dest, &Ch, 1, flag); break;
204 case _T('b'): Ch = _T('\b'); add(&dlen, &dest, &Ch, 1, flag); break;
205 case _T('a'): Ch = _T('\a'); add(&dlen, &dest, &Ch, 1, flag); break;
206 case _T('t'): Ch = _T('\t'); add(&dlen, &dest, &Ch, 1, flag); break;
207 case _T('U'): flag |= FLAG_UP_CASE; break;
208 case _T('u'): flag |= FLAG_UP_NEXT; break;
209 case _T('L'): flag |= FLAG_DOWN_CASE; break;
210 case _T('l'): flag |= FLAG_DOWN_NEXT; break;
212 case _T('e'): flag &= ~(FLAG_UP_CASE | FLAG_DOWN_CASE); break;
218 if (*rep == 0) return 0;
219 N = _totupper(*rep) - 48; if (N > 9) N = N + 48 - 65 + 10; if (N > 15) return 0;
222 if (*rep == 0) return 0;
223 N = _totupper(*rep) - 48; if (N > 9) N = N + 48 - 65 + 10; if (N > 15) return 0;
228 add(&dlen, &dest, &Ch, 1, flag);
235 if (*rep == 0) return 0;
236 N = _totupper(*rep) - 48; if (N > 9) return 0;
239 if (*rep == 0) return 0;
240 N = _totupper(*rep) - 48; if (N > 9) return 0;
243 if (*rep == 0) return 0;
244 N = _totupper(*rep) - 48; if (N > 9) return 0;
249 add(&dlen, &dest, &Ch, 1, flag);
256 if (*rep == 0) return 0;
257 N = _totupper(*rep) - 48; if (N > 7) return 0;
260 if (*rep == 0) return 0;
261 N = _totupper(*rep) - 48; if (N > 7) return 0;
264 if (*rep == 0) return 0;
265 N = _totupper(*rep) - 48; if (N > 7) return 0;
270 add(&dlen, &dest, &Ch, 1, flag);
273 add(&dlen, &dest, &Ch, 1, flag);
278 add(&dlen, &dest, &Ch, 1, flag);
282 // add(&dlen, &dest, Src + match.Close[0], len - match.Close[0]);
283 ASSERT(dlen < INT_MAX);
284 *Dlen = static_cast<int>(dlen);