1 #include "util_string.h"
2 #include "util_const.h"
3 #include "util_random.h"
7 #include <boost/regex.hpp>
17 ustring c3 (const ustring& str) {
19 static uregex re ("^[0-9]+");
25 if (str[0] == '-' || str[0] == '+') {
29 if (usearch (b, e, m, re)) {
30 int n = m[0].second - m[0].first;
31 int l = str.size () + n / 3;
36 ans.append (1, str[0]);
38 for (; b != m[0].second; b ++) {
40 if (n > 1 && n % 3 == 1) {
41 ans.append (CharConst (","));
45 for (; b != e; b ++) {
54 static int hex (char c) {
55 if ('0' <= c && c <= '9') {
57 } else if ('a' <= c && c <= 'f') {
58 return (c - 'a' + 10);
59 } else if ('A' <= c && c <= 'F') {
60 return (c - 'A' + 10);
66 static int hex (char c1, char c2) {
67 return (hex (c1) * 16 + hex (c2));
70 static char hexchar (int c) {
73 else if (10 <= c <= 15)
79 static ustring percentHex (int c) {
82 ans[1] = hexchar ((c >> 4) & 0x0f);
83 ans[2] = hexchar (c & 0x0f);
87 ustring urldecode_nonul (const ustring& str) {
89 static uregex re ("(\\+)|%([0-9a-fA-F][0-9a-fA-F])|\\x00");
93 ans.reserve (str.size ());
96 while (usearch (b, e, m, re)) {
97 if (b != m[0].first) {
98 ans.append (b, m[0].first);
102 } else if (m[2].matched) {
103 int v = hex (*(m[2].first), *(m[2].first + 1));
117 static ustring omitPattern (const ustring& text, uregex& re) {
118 Splitter sp (text, re);
123 ans.reserve (text.length ());
124 if (sp.begin () != sp.end ())
125 ans.append (sp.begin (), sp.end ());
127 if (sp.begin () != sp.end ())
128 ans.append (sp.begin (), sp.end ());
139 ustring omitCtrl (const ustring& str) {
140 static uregex re ("[\\x00-\\x1f\\x7f]+");
141 return omitPattern (str, re);
144 ustring omitNL (const ustring& str) {
145 return omitPattern (str, re_nl);
148 ustring omitNonAscii (const ustring& str) {
149 static uregex re ("[^ -\\x7e]+");
150 return omitPattern (str, re);
153 ustring omitNonAsciiWord (const ustring& str) {
154 static uregex re ("[^\\x21-\\x7e]+");
155 return omitPattern (str, re);
158 bool to_bool (const ustring& v) {
159 if (v.length () == 0 || (v.length () == 1 && v[0] == '0')) {
166 static ustring percentEncode (const ustring& text, uregex& re) {
176 if (b != e && usearch (b, e, m, re)) {
177 if (b != m[0].first) {
178 ans.append (ustring (b, m[0].first));
181 ans.append (uUScore);
182 } else if (m[2].matched) {
183 ans.append (percentHex (*m[2].first));
188 while (b != e && usearch (b, e, m, re)) {
189 if (b != m[0].first) {
190 ans.append (ustring (b, m[0].first));
193 ans.append (uUScore);
194 } else if (m[2].matched) {
195 ans.append (percentHex (*m[2].first));
202 ans.append (ustring (b, e));
210 ustring urlencode (const ustring& url) {
211 static uregex re ("(\\x00)|([^a-zA-Z0-9_.,/-])");
213 return percentEncode (url, re);
216 ustring cookieencode (const ustring& text) {
217 static uregex re ("([\\x00-\\x1f\\x7f])|([ ,;%\\x80-\\xff])");
219 return percentEncode (text, re);
222 ustring cookiedecode (const ustring& text) {
227 static uregex re ("%([0-9a-fA-F])([0-9a-fA-F])");
231 while (usearch (b, e, m, re)) {
233 ans.append (ustring (b, m[0].first));
234 a = hex (*m[1].first, *m[2].first);
239 ans.append (ustring (b, e));
244 ustring clipColon (const ustring& text) {
248 for (i = 0; i < ans.size (); i ++) {
255 ustring dirPart (char* path) {
256 char* e = rindex (path, '/');
258 if (e && e != path) {
259 return ustring (path, e - path);
265 ustring dirPart (const ustring& path) {
266 ustring::size_type s = path.rfind ('/', path.size ());
268 if (s == ustring::npos) {
271 return ustring (path.begin (), path.begin () + s);
275 ustring filePart_osSafe (const ustring& path) {
277 static uregex re ("[^\\\\/]+$");
279 if (usearch (path, m, re)) {
280 return ustring (m[0].first, m[0].second);
286 void split (uiterator b, uiterator e, uregex& re, std::vector<ustring>& ans) {
287 Splitter sp (b, e, re);
290 ans.push_back (sp.cur ());
294 bool splitChar (uiterator b, uiterator e, uiterator::value_type ch, uiterator& m1) {
295 for (; b < e; b ++) {
305 static char Base64Char[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
306 ustring base64Encode (uiterator b, uiterator e) {
317 ans.append (1, Base64Char[(c0 >> 2) & 0x3f]);
318 ans.append (1, Base64Char[((c0 & 0x03) << 4) | ((c1 >> 4) & 0x0f)]);
319 ans.append (1, Base64Char[((c1 & 0x0f) << 2) | ((c2 >> 6) & 0x03)]);
320 ans.append (1, Base64Char[c2 & 0x3f]);
321 } else if (size == 2) {
324 ans.append (1, Base64Char[(c0 >> 2) & 0x3f]);
325 ans.append (1, Base64Char[((c0 & 0x03) << 4) | ((c1 >> 4) & 0x0f)]);
326 ans.append (1, Base64Char[((c1 & 0x0f) << 2)]);
328 } else if (size == 1) {
330 ans.append (1, Base64Char[(c0 >> 2) & 0x3f]);
331 ans.append (1, Base64Char[((c0 & 0x03) << 4)]);
341 ustring escape_re (const ustring& text) {
342 ustring::const_iterator b, e;
347 static uregex re ("[^\\x01- !\"#%',/0-9:;<=>@A-Z_`a-z~\\x7f-\\xff-]");
351 ans.reserve (text.size () + 16);
354 while (b != e && usearch (b, e, m, re)) {
356 ans.append (b, m[0].first);
358 buf[2] = hexchar ((c >> 4) & 0x0f);
359 buf[3] = hexchar (c & 0x0f);
368 ustring slashEncode (const ustring& text) {
369 ustring::const_iterator b, e;
374 static uregex re ("([\\x00-\\x1f\\x7f])|(\\\\)|(\")");
380 while (b != e && usearch (b, e, m, re)) {
382 ans.append (b, m[0].first);
387 ans.append (CharConst ("\\t"));
390 ans.append (CharConst ("\\r"));
393 ans.append (CharConst ("\\n"));
396 buf[2] = hexchar ((c >> 4) & 0x0f);
397 buf[3] = hexchar (c & 0x0f);
400 } else if (m[2].matched) {
401 ans.append (CharConst ("\\\\"));
402 } else if (m[3].matched) {
403 ans.append (CharConst ("\\\""));
414 ustring slashDecode (const ustring& text) {
415 ustring::const_iterator b, e;
419 static uregex re ("\\\\([0-7][0-7][0-7]|[\\x00-\\x7f])");
423 while (b != e && usearch (b, e, m, re)) {
425 ans.append (b, m[0].first);
430 ans.append (CharConst ("\t"));
433 ans.append (CharConst ("\r"));
436 ans.append (CharConst ("\n"));
439 if (m[0].second - m[0].first == 4) {
445 if (0 < c && c < 0x20)
458 unsigned long strtoul (const ustring& str) {
459 return strtoul (str.c_str (), NULL, 10);
462 unsigned long strtoul (const uiterator& b) {
463 return strtoul (&*b, NULL, 10);
466 long strtol (const ustring& str) {
467 return strtol (str.c_str (), NULL, 10);
470 double strtod (const ustring& str) {
471 return strtod (str.c_str (), NULL);
474 bool passMatch (const ustring& pass, const ustring& cpass) {
475 if (pass.length () == 0 || cpass.length () == 0)
477 return (strcmp (crypt (pass.c_str (), cpass.c_str ()), cpass.c_str ()) == 0);
480 ustring passCrypt (const ustring& pass) {
481 ustring salt = makeSalt ();
482 return ustring (crypt (pass.c_str (), salt.c_str ()));
485 size_t strLength (const ustring& src) {
497 void substring (const ustring& src, size_t idx, size_t len, int flen, ustring& ans) {
503 for (i = 0; i < idx && b < e; i ++)
507 for (i = 0; i < len && t < e; i ++)
515 ustring utf16Encode (const ustring& str) {
522 ans.reserve (u.size () * 3);
525 for (i = 0; i < u.size (); i += 2) {
527 b[2] = hexchar ((c >> 4) & 0x0f);
528 b[3] = hexchar (c & 0x0f);
530 b[4] = hexchar ((c >> 4) & 0x0f);
531 b[5] = hexchar (c & 0x0f);
537 ustring filenameEncode (const ustring& text) {
538 static uregex re ("([\\x00-\\x1f\\x7f])|([^a-zA-Z0-9._-])|(^\\.+)");
539 Splitter sp (text, re);
543 if (text.length () == 0) {
544 throw (ustring (text).append (uErrorBadName));
546 ans.reserve (text.length () + 16);
548 if (sp.begin () < sp.end ())
549 ans.append (sp.begin (), sp.end ());
551 } else if (sp.match (2)) {
552 c = *sp.matchBegin (2);
554 ans.append (1, hexchar ((c >> 4) & 0x0f));
555 ans.append (1, hexchar (c & 0x0f));
556 } else if (sp.match (3)) {
557 for (c = sp.matchEnd (3) - sp.matchBegin (3); c > 0; c --) {
558 ans.append (CharConst (":2e"));
562 if (ans.length () > 250)
567 bool matchSkip (uiterator& b, uiterator e, const char* t, size_t s) {
568 if (e - b >= s && memcmp (t, &b[0], s) == 0) {
576 bool matchHead (uiterator& b, uiterator e, const char* t, size_t s) {
577 if (e - b >= s && memcmp (t, &b[0], s) == 0) {
584 bool matchHead (const ustring& str, const char* t, size_t s) {
585 if (str.length () >= s && memcmp (t, &*str.begin (), s) == 0) {
592 bool matchHead (const ustring& str, const ustring& head) {
593 if (str.length () >= head.length () && memcmp (&*str.begin (), &*head.begin (), head.length ()) == 0) {
600 bool match (uiterator b, uiterator e, const char* t, size_t s) {
601 if (e - b == s && memcmp (t, &b[0], s) == 0) {
608 bool match (const ustring& str, const char* t, size_t s) {
609 if (str.length () == s && memcmp (t, str.data (), s) == 0) {
616 bool match (uiterator b, uiterator e, const ustring& str) {
617 if (e - b == str.length () && memcmp (str.data (), &b[0], str.length ()) == 0) {
624 bool match (const ustring& str, const char* t, size_t s, const char* t2, size_t s2) {
625 if (match (str, t, s) || match (str, t2, s2)) {
632 ustring clipWhite (uiterator b, uiterator e) {
640 if (isblank (*(e - 1))) {
645 return ustring (b, e);
647 ustring clipWhite (const ustring& str) {
648 return clipWhite (str.begin (), str.end ());
651 ustring getenvString (const char* key) {
652 char* e = getenv (key);
660 ustring zeroPad (int n, const ustring& src) {
663 n = std::min (32, n);
664 m = n - src.length ();
676 bool wsearch (const ustring& text, boost::wsmatch& m, const ustring& reg, boost::wregex::flag_type reg_flags, boost::match_flag_type search_flags) {
677 std::wstring wtext = utow (text);
678 std::wstring wreg = utow (reg);
679 boost::wregex re (wreg, reg_flags);
680 return regex_search (wtext, m, re, search_flags);
683 ustring uiconv (const ustring& src, const char* tocode, const char* fromcode) {
688 size_t isize, osize, rsize;
691 cd = iconv_open (tocode, fromcode);
692 if (cd == (iconv_t)(-1))
693 throw (ustring ("bad encoding name."));
699 rsize = iconv (cd, &ibuf, &isize, &obuf, &osize);
703 ans.append (buf, obuf - buf);
709 ustring padEmpty (const ustring& name) {
711 return ustring (CharConst ("(null)"));
716 ustring dtoustring (double val) {
718 return ustring (b, snprintf (b, 32, "%.*g", DBL_DIG, val));
721 uint32_t hextoul (uiterator b, uiterator e) {
725 for (n = 0; n < 8 && b != e; n ++, b ++) {
726 ans = (ans << 4) + hex (*b);
731 ustring toCRLF (const ustring& str) {
732 uiterator b = str.begin ();
733 uiterator e = str.end ();
737 while (usearch (b, e, m, re_lf)) {
738 ans.append (b, m[0].first).append (uCRLF);
745 void skipSpace (uiterator& b, uiterator e) {
746 while (b < e && *b == ' ') {
751 static ustring::value_type toLower_ustring_value (ustring::value_type v) {
752 if ('A' <= v && v <= 'Z') {
753 return v - 'A' + 'a';
760 void toLower (ustring::iterator* b, ustring::iterator* e) {
761 transform (*b, *e, *b, toLower_ustring_value);
765 ustring toLower (uiterator b, uiterator e) {
770 for (; b < e; b ++, i++) {
771 *i = toLower_ustring_value (*b);