1 #include "util_string.h"
2 #include "util_const.h"
3 #include "util_random.h"
4 #include "util_splitter.h"
8 #include <boost/regex.hpp>
9 #include <boost/regex/pattern_except.hpp>
10 #include <boost/algorithm/string.hpp>
20 UIConv::UIConv (const char* in, const char* out) {
21 cd = iconv_open (in, out);
22 if (cd == ICONV_ERR) {
23 throw (ustring (in).append (CharConst (", ")).append (ustring (out)).append (CharConst (": unknown encoding.")));
27 ustring UIConv::cv (const ustring& text) {
30 if (cd != ICONV_ERR) {
31 char* buf = new char[4096];
34 size_t isize, osize, rsize;
36 ibuf = text.begin ().base ();
42 rsize = ::iconv (cd, (char**)&ibuf, &isize, &obuf, &osize);
44 rsize = ::iconv (cd, &ibuf, &isize, &obuf, &osize);
47 if (errno == EILSEQ) {
50 ans.append (CharConst ("_"));
51 } else if (errno == EINVAL) {
52 } else if (errno == E2BIG) {
58 ans.append (buf, obuf - buf);
65 ustring c3 (const ustring& str) {
67 static uregex re ("^[0-9]+");
73 if (str[0] == '-' || str[0] == '+') {
77 if (usearch (b, e, m, re)) {
78 int n = m[0].second - m[0].first;
79 int l = str.size () + n / 3;
84 ans.append (1, str[0]);
86 for (; b != m[0].second; b ++) {
88 if (n > 1 && n % 3 == 1) {
89 ans.append (CharConst (","));
93 for (; b != e; b ++) {
102 ustring to_ustring (double val) {
104 return ustring (b, snprintf (b, 32, "%.*g", DBL_DIG, val));
107 static int hex (char c) {
108 if ('0' <= c && c <= '9') {
110 } else if ('a' <= c && c <= 'f') {
111 return (c - 'a' + 10);
112 } else if ('A' <= c && c <= 'F') {
113 return (c - 'A' + 10);
119 static int hex (char c1, char c2) {
120 return (hex (c1) * 16 + hex (c2));
123 static char hexchar (int c) {
124 if (0 <= c && c <= 9)
126 else if (10 <= c && c <= 15)
132 static char hexchar_c (int c) {
133 if (0 <= c && c <= 9)
135 else if (10 <= c && c <= 15)
141 static ustring percentHex (int c) {
142 ustring ans (3, '%');
144 ans[1] = hexchar ((c >> 4) & 0x0f);
145 ans[2] = hexchar (c & 0x0f);
149 ustring percentHEX (int c) {
150 ustring ans (3, '%');
152 ans[1] = hexchar_c ((c >> 4) & 0x0f);
153 ans[2] = hexchar_c (c & 0x0f);
157 ustring urldecode_nonul (const ustring& str) {
159 static uregex re ("(\\+)|%([0-9a-fA-F][0-9a-fA-F])|\\x00");
163 ans.reserve (str.size ());
166 while (usearch (b, e, m, re)) {
167 if (b != m[0].first) {
168 ans.append (b, m[0].first);
172 } else if (m[2].matched) {
173 int v = hex (*(m[2].first), *(m[2].first + 1));
187 ustring omitPattern (const ustring& text, uregex& re) {
188 Splitter sp (text, re);
193 ans.reserve (text.length ());
194 if (sp.begin () != sp.end ())
195 ans.append (sp.begin (), sp.end ());
197 if (sp.begin () != sp.end ())
198 ans.append (sp.begin (), sp.end ());
209 ustring omitCtrl (const ustring& str) {
210 static uregex re ("[\\x00-\\x1f\\x7f]+");
211 return omitPattern (str, re);
214 ustring omitCtrlX (const ustring& str) {
215 static uregex re ("[^\\x09\\x0a\\x20-\\x7e\\x80-\\xff]+");
216 return omitPattern (str, re);
219 ustring omitNul (const ustring& str) {
220 static uregex re ("[\\x00]+");
221 return omitPattern (str, re);
224 ustring omitNL (const ustring& str) {
225 return omitPattern (str, re_nl);
228 ustring omitNonAscii (const ustring& str) {
229 static uregex re ("[^ -\\x7e]+");
230 return omitPattern (str, re);
233 ustring omitNonAsciiWord (const ustring& str) {
234 static uregex re ("[^\\x21-\\x7e]+");
235 return omitPattern (str, re);
238 static ustring percentEncode (uiterator b, uiterator e, const uregex& re) {
244 while (b < e && usearch (b, e, m, re)) {
246 ans.append (b, m[0].first);
248 ans.append (uUScore);
249 } else if (m[2].matched) {
250 ans.append (percentHEX (*m[2].first));
262 ustring percentEncode (uiterator b, uiterator e) {
263 static uregex re ("(\\x00)|([^A-Za-z0-9_.~-])");
265 return percentEncode (b, e, re);
268 ustring percentEncode_path (uiterator b, uiterator e) {
269 static uregex re ("(\\x00)|([^A-Za-z0-9_/.~-])");
271 return percentEncode (b, e, re);
274 ustring percentEncode (const ustring& str) {
275 return percentEncode (str.begin (), str.end ());
278 ustring percentEncode_path (const ustring& str) {
279 return percentEncode_path (str.begin (), str.end ());
283 ustring percentEncode_path (uiterator b, uiterator e) {
287 for (i = b; i < e; i ++) {
290 ans.append (percentEncode (b, i));
296 ans.append (percentEncode (b, e));
301 ustring percentEncode_path (const ustring& str) {
302 return percentEncode_path (str.begin (), str.end ());
306 ustring percentDecode (const ustring& str) {
308 static uregex re ("%([0-9a-fA-F][0-9a-fA-F])|\\x00");
314 while (usearch (b, e, m, re)) {
315 if (b != m[0].first) {
316 ans.append (b, m[0].first);
319 int v = hex (*(m[1].first), *(m[1].first + 1));
330 return fixUTF8 (ans);
333 ustring cookieencode (const ustring& text) {
334 static uregex re ("([\\x00-\\x1f\\x7f])|([ ,;%\\x80-\\xff])");
336 return percentEncode (text.begin (), text.end (), re);
339 ustring cookiedecode (const ustring& text) {
344 static uregex re ("%([0-9a-fA-F])([0-9a-fA-F])");
348 while (usearch (b, e, m, re)) {
350 ans.append (ustring (b, m[0].first));
351 a = hex (*m[1].first, *m[2].first);
356 ans.append (ustring (b, e));
361 ustring clipColon (const ustring& text) {
365 for (i = 0; i < ans.size (); i ++) {
372 ustring dirPart (const ustring& path) {
373 ustring::size_type s = path.rfind ('/', path.size ());
375 if (s == ustring::npos) {
379 return ustring (path.begin (), path.begin () + s);
383 ustring filePart_osSafe (const ustring& path) {
385 static uregex re ("[^\\\\/]+$");
387 if (usearch (path, m, re)) {
388 return ustring (m[0].first, m[0].second);
394 void split (uiterator b, uiterator e, uregex& re, std::vector<ustring>& ans) {
395 Splitter sp (b, e, re);
398 ans.push_back (sp.cur ());
402 void splitE (uiterator b, uiterator e, uregex& re, std::vector<ustring>& ans) {
403 Splitter sp (b, e, re);
406 while (sp.nextSep ()) {
407 ans.push_back (sp.cur ());
409 ans.push_back (ustring (sp.begin (), sp.eol ()));
413 bool splitChar (uiterator b, uiterator e, uiterator::value_type ch, uiterator& m1) {
414 for (; b < e; b ++) {
424 ustring escape_re (const ustring& text) {
425 ustring::const_iterator b, e;
430 static uregex re ("[^\\x01- !\"#%',/0-9:;<=>@A-Z_`a-z~\\x7f-\\xff-]");
434 ans.reserve (text.size () + 16);
437 while (b != e && usearch (b, e, m, re)) {
439 ans.append (b, m[0].first);
441 buf[2] = hexchar ((c >> 4) & 0x0f);
442 buf[3] = hexchar (c & 0x0f);
451 ustring slashEncode (const ustring& text) {
452 ustring::const_iterator b, e;
457 static uregex re ("([\\x00-\\x1f\\x7f])|(\\\\)|(\")");
463 while (b != e && usearch (b, e, m, re)) {
465 ans.append (b, m[0].first);
470 ans.append (CharConst ("\\t"));
473 ans.append (CharConst ("\\r"));
476 ans.append (CharConst ("\\n"));
479 buf[2] = hexchar ((c >> 4) & 0x0f);
480 buf[3] = hexchar (c & 0x0f);
483 } else if (m[2].matched) {
484 ans.append (CharConst ("\\\\"));
485 } else if (m[3].matched) {
486 ans.append (CharConst ("\\\""));
497 ustring slashDecode (const ustring& text) {
498 ustring::const_iterator b, e;
502 static uregex re ("\\\\([0-7][0-7][0-7]|[\\x00-\\x7f])");
506 while (b != e && usearch (b, e, m, re)) {
508 ans.append (b, m[0].first);
513 ans.append (CharConst ("\t"));
516 ans.append (CharConst ("\r"));
519 ans.append (CharConst ("\n"));
522 if (m[0].second - m[0].first == 4) {
528 if (0 < c && c < 0x20)
541 unsigned long strtoul (const ustring& str) {
542 return strtoul (str.c_str (), NULL, 10);
545 unsigned long strtoul (const uiterator& b) {
546 return strtoul (&*b, NULL, 10);
549 long strtol (const ustring& str) {
550 return strtol (str.c_str (), NULL, 10);
553 double strtod (const ustring& str) {
554 return strtod (str.c_str (), NULL);
557 bool passMatch (const ustring& pass, const ustring& cpass) {
558 if (pass.length () == 0 || cpass.length () == 0)
560 return (strcmp (crypt (pass.c_str (), cpass.c_str ()), cpass.c_str ()) == 0);
563 ustring passCrypt (const ustring& pass) {
564 ustring salt = makeSalt ();
565 return ustring (crypt (pass.c_str (), salt.c_str ()));
568 size_t strLength (const ustring& src) {
580 void substring (const ustring& src, size_t idx, size_t len, int flen, ustring& ans) {
586 for (i = 0; i < idx && b < e; i ++)
590 for (i = 0; i < len && t < e; i ++)
598 static bool jssafe[] = {
599 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0--15
600 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 16--31
601 1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1, // 32--47
602 1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0, // 48--63
603 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 64--79
604 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, // 80--95
605 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 96--111
606 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0, // 112--127
609 ustring jsEncode (const ustring& str) {
616 ans.reserve (u.size () * 3);
619 for (i = 0; i < u.size (); i += 2) {
622 if (c == 0 && 0 < d && d < 127 && jssafe[d]) {
625 b[2] = hexchar ((c >> 4) & 0x0f);
626 b[3] = hexchar (c & 0x0f);
627 b[4] = hexchar ((d >> 4) & 0x0f);
628 b[5] = hexchar (d & 0x0f);
635 ustring filenameEncode (const ustring& text) {
636 static uregex re ("([\\x00-\\x1f\\x7f])|([^a-zA-Z0-9._-])|(^\\.+)");
637 Splitter sp (text, re);
641 if (text.length () == 0) {
642 throw (ustring (text).append (uErrorBadName));
644 ans.reserve (text.length () + 16);
646 if (sp.begin () < sp.end ())
647 ans.append (sp.begin (), sp.end ());
649 } else if (sp.match (2)) {
650 c = *sp.matchBegin (2);
652 ans.append (1, hexchar ((c >> 4) & 0x0f));
653 ans.append (1, hexchar (c & 0x0f));
654 } else if (sp.match (3)) {
655 for (c = sp.matchEnd (3) - sp.matchBegin (3); c > 0; c --) {
656 ans.append (CharConst (":2e"));
660 if (ans.length () > 250)
665 ustring filenameDecode (const ustring& text) {
666 static uregex re (":([0-9a-fA-F][0-9a-fA-F])");
667 Splitter sp (text, re);
671 ans.reserve (text.length ());
673 if (sp.begin () < sp.end ())
674 ans.append (sp.begin (), sp.end ());
676 c = hex (*(sp.matchBegin (1))) * 16 + hex (*(sp.matchBegin (1) + 1));
677 if (32 <= c && c < 256)
684 bool matchSkip (uiterator& b, uiterator e, const char* t, size_t s) {
685 if (e - b >= s && memcmp (t, &b[0], s) == 0) {
693 bool matchHead (uiterator& b, uiterator e, const char* t, size_t s) {
694 if (e - b >= s && memcmp (t, &b[0], s) == 0) {
701 bool matchHead (const ustring& str, const char* t, size_t s) {
702 if (str.length () >= s && memcmp (t, &*str.begin (), s) == 0) {
709 bool matchHead (const ustring& str, const ustring& head) {
710 if (str.length () >= head.length () && memcmp (&*str.begin (), &*head.begin (), head.length ()) == 0) {
717 bool match (uiterator b, uiterator e, const char* t, size_t s) {
718 if (e - b == s && memcmp (t, &b[0], s) == 0) {
725 bool match (const ustring& str, const char* t, size_t s) {
726 if (str.length () == s && memcmp (t, str.data (), s) == 0) {
733 bool match (uiterator b, uiterator e, const ustring& str) {
734 if (e - b == str.length () && memcmp (str.data (), &b[0], str.length ()) == 0) {
741 bool match (const ustring& str, const char* t, size_t s, const char* t2, size_t s2) {
742 if (match (str, t, s) || match (str, t2, s2)) {
749 ustring clipWhite (uiterator b, uiterator e) {
757 if (isblank (*(e - 1))) {
762 return ustring (b, e);
764 ustring clipWhite (const ustring& str) {
765 return clipWhite (str.begin (), str.end ());
768 ustring getenvString (const char* key) {
769 char* e = getenv (key);
777 ustring zeroPad (int n, const ustring& src) {
780 n = std::min (32, n);
781 m = n - src.length ();
793 ustring padEmpty (const ustring& name) {
795 return ustring (CharConst ("(null)"));
800 uint32_t hextoul (uiterator b, uiterator e) {
804 for (n = 0; n < 8 && b != e; n ++, b ++) {
805 ans = (ans << 4) + hex (*b);
810 ustring toCRLF (const ustring& str) {
811 uiterator b = str.begin ();
812 uiterator e = str.end ();
816 while (usearch (b, e, m, re_lf)) {
817 ans.append (b, m[0].first).append (uCRLF);
824 void skipChar (uiterator& b, uiterator e, int ch) {
825 while (b < e && *b == ch)
829 void skipNextToChar (uiterator& b, uiterator e, int ch) {
836 static ustring::value_type toLower_ustring_value (ustring::value_type v) {
837 if ('A' <= v && v <= 'Z') {
838 return v - 'A' + 'a';
844 ustring toLower (uiterator b, uiterator e) {
849 for (; b < e; b ++, i++) {
850 *i = toLower_ustring_value (*b);
855 static ustring colpad0 (int n, const ustring& src) {
859 n = std::min (32, n);
860 m = n - src.length ();
870 return ustring (src.end () - n, src.end ());
879 ${M:2}, ${M}, ${M:name}, ${M:ab}
887 //ustring formatDateString (const ustring& format, time_t tm) {
888 ustring formatDateString (const ustring& format, struct tm& v) {
894 // static uregex re ("\\$\\{([YMDhmsWw])(:([0-9]))?\\}");
895 static uregex re ("\\$\\{(([YMDhmsWwo])(:([0-9]))?|M:((name)|(ab)|(abname)))\\}");
896 std::vector<ustring> fpar;
898 // localtime_r (&tm, &v);
901 while (usearch (b, e, m, re)) {
902 ans.append (b, m[0].first);
905 if (m[6].matched) { // name
906 ans.append (MStr[v.tm_mon]);
907 } else if (m[7].matched || m[8].matched) { // abname
908 ans.append (MStr_a[v.tm_mon]);
911 // if (m[2].matched) {
913 // pc = strtol (ustring (m[3].first, m[3].second));
914 pc = strtol (ustring (m[4].first, m[4].second));
918 // switch (*m[1].first) {
919 switch (*m[2].first) {
921 ans.append (colpad0 (pc, to_ustring (v.tm_year + 1900)));
924 ans.append (colpad0 (pc, to_ustring (v.tm_mon + 1)));
927 ans.append (colpad0 (pc, to_ustring (v.tm_mday)));
930 ans.append (colpad0 (pc, to_ustring (v.tm_hour)));
933 ans.append (colpad0 (pc, to_ustring (v.tm_min)));
936 ans.append (colpad0 (pc, to_ustring (v.tm_sec)));
939 ans.append (WStr [v.tm_wday]);
942 ans.append (WStr_a [v.tm_wday]);
947 if (v.tm_gmtoff < 0) {
948 h = - v.tm_gmtoff / 60;
951 ans.append (CharConst ("-")).append (colpad0 (4, to_ustring (h * 100 + m)));
953 h = v.tm_gmtoff / 60;
956 ans.append (CharConst ("+")).append (colpad0 (4, to_ustring (h * 100 + m)));
968 ustring toLower (const ustring& str) {
969 return boost::to_lower_copy (str);
972 ustring toUpper (const ustring& str) {
973 return boost::to_upper_copy (str);
976 ustring hexEncode (const ustring& data) {
980 ans.reserve (data.length () * 2);
983 for (; b < e; b ++) {
984 ans.append (1, hexchar ((*b >> 4) & 0x0f));
985 ans.append (1, hexchar (*b & 0x0f));
990 int octchar (uiterator b) { // 3bytes
994 ans = ans * 8 + *b - '0';
996 ans = ans * 8 + *b - '0';
1000 ustring octchar (int c) {
1002 ans[2] = (c & 0x7) + '0';
1004 ans[1] = (c & 0x7) + '0';
1006 ans[0] = (c & 0x3) + '0';