1 #include "util_string.h"
2 #include "util_const.h"
3 #include "util_random.h"
10 #include <boost/regex.hpp>
11 #include <boost/regex/pattern_except.hpp>
21 ustring c3 (const ustring& str) {
23 static uregex re ("^[0-9]+");
29 if (str[0] == '-' || str[0] == '+') {
33 if (usearch (b, e, m, re)) {
34 int n = m[0].second - m[0].first;
35 int l = str.size () + n / 3;
40 ans.append (1, str[0]);
42 for (; b != m[0].second; b ++) {
44 if (n > 1 && n % 3 == 1) {
45 ans.append (CharConst (","));
49 for (; b != e; b ++) {
58 ustring to_ustring (double val) {
60 return ustring (b, snprintf (b, 32, "%.*g", DBL_DIG, val));
63 static int hex (char c) {
64 if ('0' <= c && c <= '9') {
66 } else if ('a' <= c && c <= 'f') {
67 return (c - 'a' + 10);
68 } else if ('A' <= c && c <= 'F') {
69 return (c - 'A' + 10);
75 static int hex (char c1, char c2) {
76 return (hex (c1) * 16 + hex (c2));
79 static char hexchar (int c) {
82 else if (10 <= c <= 15)
88 static ustring percentHex (int c) {
91 ans[1] = hexchar ((c >> 4) & 0x0f);
92 ans[2] = hexchar (c & 0x0f);
96 ustring urldecode_nonul (const ustring& str) {
98 static uregex re ("(\\+)|%([0-9a-fA-F][0-9a-fA-F])|\\x00");
102 ans.reserve (str.size ());
105 while (usearch (b, e, m, re)) {
106 if (b != m[0].first) {
107 ans.append (b, m[0].first);
111 } else if (m[2].matched) {
112 int v = hex (*(m[2].first), *(m[2].first + 1));
126 static ustring omitPattern (const ustring& text, uregex& re) {
127 Splitter sp (text, re);
132 ans.reserve (text.length ());
133 if (sp.begin () != sp.end ())
134 ans.append (sp.begin (), sp.end ());
136 if (sp.begin () != sp.end ())
137 ans.append (sp.begin (), sp.end ());
148 ustring omitCtrl (const ustring& str) {
149 static uregex re ("[\\x00-\\x1f\\x7f]+");
150 return omitPattern (str, re);
153 ustring omitNL (const ustring& str) {
154 return omitPattern (str, re_nl);
157 ustring omitNonAscii (const ustring& str) {
158 static uregex re ("[^ -\\x7e]+");
159 return omitPattern (str, re);
162 ustring omitNonAsciiWord (const ustring& str) {
163 static uregex re ("[^\\x21-\\x7e]+");
164 return omitPattern (str, re);
167 bool to_bool (const ustring& v) {
168 if (v.length () == 0 || (v.length () == 1 && v[0] == '0')) {
175 static ustring percentEncode (const ustring& text, uregex& re) {
185 if (b != e && usearch (b, e, m, re)) {
186 if (b != m[0].first) {
187 ans.append (ustring (b, m[0].first));
190 ans.append (uUScore);
191 } else if (m[2].matched) {
192 ans.append (percentHex (*m[2].first));
197 while (b != e && usearch (b, e, m, re)) {
198 if (b != m[0].first) {
199 ans.append (ustring (b, m[0].first));
202 ans.append (uUScore);
203 } else if (m[2].matched) {
204 ans.append (percentHex (*m[2].first));
211 ans.append (ustring (b, e));
219 ustring urlencode (const ustring& url) {
220 static uregex re ("(\\x00)|([^a-zA-Z0-9_.,/-])");
222 return percentEncode (url, re);
225 ustring cookieencode (const ustring& text) {
226 static uregex re ("([\\x00-\\x1f\\x7f])|([ ,;%\\x80-\\xff])");
228 return percentEncode (text, re);
231 ustring cookiedecode (const ustring& text) {
236 static uregex re ("%([0-9a-fA-F])([0-9a-fA-F])");
240 while (usearch (b, e, m, re)) {
242 ans.append (ustring (b, m[0].first));
243 a = hex (*m[1].first, *m[2].first);
248 ans.append (ustring (b, e));
253 ustring clipColon (const ustring& text) {
257 for (i = 0; i < ans.size (); i ++) {
264 ustring dirPart (char* path) {
265 char* e = rindex (path, '/');
267 if (e && e != path) {
268 return ustring (path, e - path);
274 ustring dirPart (const ustring& path) {
275 ustring::size_type s = path.rfind ('/', path.size ());
277 if (s == ustring::npos) {
280 return ustring (path.begin (), path.begin () + s);
284 ustring filePart_osSafe (const ustring& path) {
286 static uregex re ("[^\\\\/]+$");
288 if (usearch (path, m, re)) {
289 return ustring (m[0].first, m[0].second);
295 void split (uiterator b, uiterator e, uregex& re, std::vector<ustring>& ans) {
296 Splitter sp (b, e, re);
299 ans.push_back (sp.cur ());
303 bool splitChar (uiterator b, uiterator e, uiterator::value_type ch, uiterator& m1) {
304 for (; b < e; b ++) {
314 static char Base64Char[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
315 ustring base64Encode (uiterator b, uiterator e) {
326 ans.append (1, Base64Char[(c0 >> 2) & 0x3f]);
327 ans.append (1, Base64Char[((c0 & 0x03) << 4) | ((c1 >> 4) & 0x0f)]);
328 ans.append (1, Base64Char[((c1 & 0x0f) << 2) | ((c2 >> 6) & 0x03)]);
329 ans.append (1, Base64Char[c2 & 0x3f]);
330 } else if (size == 2) {
333 ans.append (1, Base64Char[(c0 >> 2) & 0x3f]);
334 ans.append (1, Base64Char[((c0 & 0x03) << 4) | ((c1 >> 4) & 0x0f)]);
335 ans.append (1, Base64Char[((c1 & 0x0f) << 2)]);
337 } else if (size == 1) {
339 ans.append (1, Base64Char[(c0 >> 2) & 0x3f]);
340 ans.append (1, Base64Char[((c0 & 0x03) << 4)]);
350 ustring escape_re (const ustring& text) {
351 ustring::const_iterator b, e;
356 static uregex re ("[^\\x01- !\"#%',/0-9:;<=>@A-Z_`a-z~\\x7f-\\xff-]");
360 ans.reserve (text.size () + 16);
363 while (b != e && usearch (b, e, m, re)) {
365 ans.append (b, m[0].first);
367 buf[2] = hexchar ((c >> 4) & 0x0f);
368 buf[3] = hexchar (c & 0x0f);
377 ustring slashEncode (const ustring& text) {
378 ustring::const_iterator b, e;
383 static uregex re ("([\\x00-\\x1f\\x7f])|(\\\\)|(\")");
389 while (b != e && usearch (b, e, m, re)) {
391 ans.append (b, m[0].first);
396 ans.append (CharConst ("\\t"));
399 ans.append (CharConst ("\\r"));
402 ans.append (CharConst ("\\n"));
405 buf[2] = hexchar ((c >> 4) & 0x0f);
406 buf[3] = hexchar (c & 0x0f);
409 } else if (m[2].matched) {
410 ans.append (CharConst ("\\\\"));
411 } else if (m[3].matched) {
412 ans.append (CharConst ("\\\""));
423 ustring slashDecode (const ustring& text) {
424 ustring::const_iterator b, e;
428 static uregex re ("\\\\([0-7][0-7][0-7]|[\\x00-\\x7f])");
432 while (b != e && usearch (b, e, m, re)) {
434 ans.append (b, m[0].first);
439 ans.append (CharConst ("\t"));
442 ans.append (CharConst ("\r"));
445 ans.append (CharConst ("\n"));
448 if (m[0].second - m[0].first == 4) {
454 if (0 < c && c < 0x20)
467 unsigned long strtoul (const ustring& str) {
468 return strtoul (str.c_str (), NULL, 10);
471 unsigned long strtoul (const uiterator& b) {
472 return strtoul (&*b, NULL, 10);
475 long strtol (const ustring& str) {
476 return strtol (str.c_str (), NULL, 10);
479 double strtod (const ustring& str) {
480 return strtod (str.c_str (), NULL);
483 bool passMatch (const ustring& pass, const ustring& cpass) {
484 if (pass.length () == 0 || cpass.length () == 0)
486 return (strcmp (crypt (pass.c_str (), cpass.c_str ()), cpass.c_str ()) == 0);
489 ustring passCrypt (const ustring& pass) {
490 ustring salt = makeSalt ();
491 return ustring (crypt (pass.c_str (), salt.c_str ()));
494 size_t strLength (const ustring& src) {
506 void substring (const ustring& src, size_t idx, size_t len, int flen, ustring& ans) {
512 for (i = 0; i < idx && b < e; i ++)
516 for (i = 0; i < len && t < e; i ++)
524 ustring utf16Encode (const ustring& str) {
531 ans.reserve (u.size () * 3);
534 for (i = 0; i < u.size (); i += 2) {
536 b[2] = hexchar ((c >> 4) & 0x0f);
537 b[3] = hexchar (c & 0x0f);
539 b[4] = hexchar ((c >> 4) & 0x0f);
540 b[5] = hexchar (c & 0x0f);
546 ustring filenameEncode (const ustring& text) {
547 static uregex re ("([\\x00-\\x1f\\x7f])|([^a-zA-Z0-9._-])|(^\\.+)");
548 Splitter sp (text, re);
552 if (text.length () == 0) {
553 throw (ustring (text).append (uErrorBadName));
555 ans.reserve (text.length () + 16);
557 if (sp.begin () < sp.end ())
558 ans.append (sp.begin (), sp.end ());
560 } else if (sp.match (2)) {
561 c = *sp.matchBegin (2);
563 ans.append (1, hexchar ((c >> 4) & 0x0f));
564 ans.append (1, hexchar (c & 0x0f));
565 } else if (sp.match (3)) {
566 for (c = sp.matchEnd (3) - sp.matchBegin (3); c > 0; c --) {
567 ans.append (CharConst (":2e"));
571 if (ans.length () > 250)
576 bool matchSkip (uiterator& b, uiterator e, const char* t, size_t s) {
577 if (e - b >= s && memcmp (t, &b[0], s) == 0) {
585 bool matchHead (uiterator& b, uiterator e, const char* t, size_t s) {
586 if (e - b >= s && memcmp (t, &b[0], s) == 0) {
593 bool matchHead (const ustring& str, const char* t, size_t s) {
594 if (str.length () >= s && memcmp (t, &*str.begin (), s) == 0) {
601 bool matchHead (const ustring& str, const ustring& head) {
602 if (str.length () >= head.length () && memcmp (&*str.begin (), &*head.begin (), head.length ()) == 0) {
609 bool match (uiterator b, uiterator e, const char* t, size_t s) {
610 if (e - b == s && memcmp (t, &b[0], s) == 0) {
617 bool match (const ustring& str, const char* t, size_t s) {
618 if (str.length () == s && memcmp (t, str.data (), s) == 0) {
625 bool match (uiterator b, uiterator e, const ustring& str) {
626 if (e - b == str.length () && memcmp (str.data (), &b[0], str.length ()) == 0) {
633 bool match (const ustring& str, const char* t, size_t s, const char* t2, size_t s2) {
634 if (match (str, t, s) || match (str, t2, s2)) {
641 ustring clipWhite (uiterator b, uiterator e) {
649 if (isblank (*(e - 1))) {
654 return ustring (b, e);
656 ustring clipWhite (const ustring& str) {
657 return clipWhite (str.begin (), str.end ());
660 ustring getenvString (const char* key) {
661 char* e = getenv (key);
669 ustring zeroPad (int n, const ustring& src) {
672 n = std::min (32, n);
673 m = n - src.length ();
685 bool wsearch (const ustring& text, boost::wsmatch& m, const ustring& reg, boost::wregex::flag_type reg_flags, boost::match_flag_type search_flags) {
687 std::wstring wtext = utow (text);
688 std::wstring wreg = utow (reg);
689 boost::wregex wre (wreg, reg_flags);
690 return regex_search (wtext, m, wre, search_flags);
691 } catch (boost::regex_error& err) {
692 throw (uErrorRegexp);
696 bool wsearch_env (MlEnv* mlenv, const ustring& text, boost::wsmatch& m, const ustring& reg, boost::wregex::flag_type reg_flags, boost::match_flag_type search_flags) {
698 mlenv->env->regtext = utow (text);
699 std::wstring wreg = utow (reg);
700 boost::wregex wre (wreg, reg_flags);
701 return regex_search (mlenv->env->regtext, m, wre, search_flags);
702 } catch (boost::regex_error& err) {
703 throw (uErrorRegexp);
707 ustring uiconv (const ustring& src, const char* tocode, const char* fromcode) {
712 size_t isize, osize, rsize;
715 cd = iconv_open (tocode, fromcode);
716 if (cd == (iconv_t)(-1))
717 throw (ustring ("bad encoding name."));
723 rsize = iconv (cd, &ibuf, &isize, &obuf, &osize);
727 ans.append (buf, obuf - buf);
733 ustring padEmpty (const ustring& name) {
735 return ustring (CharConst ("(null)"));
740 uint32_t hextoul (uiterator b, uiterator e) {
744 for (n = 0; n < 8 && b != e; n ++, b ++) {
745 ans = (ans << 4) + hex (*b);
750 ustring toCRLF (const ustring& str) {
751 uiterator b = str.begin ();
752 uiterator e = str.end ();
756 while (usearch (b, e, m, re_lf)) {
757 ans.append (b, m[0].first).append (uCRLF);
764 void skipSpace (uiterator& b, uiterator e) {
765 while (b < e && *b == ' ') {
770 static ustring::value_type toLower_ustring_value (ustring::value_type v) {
771 if ('A' <= v && v <= 'Z') {
772 return v - 'A' + 'a';
779 void toLower (ustring::iterator* b, ustring::iterator* e) {
780 transform (*b, *e, *b, toLower_ustring_value);
784 ustring toLower (uiterator b, uiterator e) {
789 for (; b < e; b ++, i++) {
790 *i = toLower_ustring_value (*b);
795 static void format_hex (ustring& ans, MNode* a, std::vector<ustring>& par, bool fcap) {
802 if (par.size () > 0) {
803 int p = strtol (par[0]);
809 ans.append (buf, snprintf (buf, 32, "%.*X", p, v));
811 ans.append (buf, snprintf (buf, 32, "%.*x", p, v));
814 ans.append (buf, snprintf (buf, 32, "%X", v));
816 ans.append (buf, snprintf (buf, 32, "%x", v));
820 static void format_hex (ustring& ans, MNode* a, std::vector<ustring>& par) {
821 format_hex (ans, a, par, false);
824 static void format_HEX (ustring& ans, MNode* a, std::vector<ustring>& par) {
825 format_hex (ans, a, par, true);
828 static void format_int_sub (ustring& ans, MNode* a, std::vector<ustring>& par, bool pad0 = false) {
836 if (par.size () > 0) {
840 if (match (par[0], CharConst ("comma")) || match (par[0], CharConst ("c"))) {
841 ans.append (c3 (to_ustring (v)));
843 int p = strtol (par[0]);
848 for (int i = 1; i < par.size (); i ++) {
849 if (match (par[i], CharConst ("clip"))) {
851 } else if (match (par[i], CharConst ("0"))) {
853 } else if (match (par[i], CharConst ("comma")) || match (par[i], CharConst ("c"))) {
856 throw (par[i] + uErrorBadParam);
860 s = snprintf (buf, 32, "%.*ld", p, v);
862 s = snprintf (buf, 32, "%*ld", p, v);
864 ans.append (buf + s - p, p);
865 else if (! fclip && fc3)
866 ans.append (c3 (ustring (buf, s)));
871 ans.append (to_ustring (v));
875 static void format_int (ustring& ans, MNode* a, std::vector<ustring>& par) {
876 format_int_sub (ans, a, par);
879 static void format_int0 (ustring& ans, MNode* a, std::vector<ustring>& par) {
880 format_int_sub (ans, a, par, true);
883 static void format_int (ustring& ans, MNode* a, int c, bool pad0 = false) {
895 s = snprintf (buf, 32, "%.*ld", c, v);
897 s = snprintf (buf, 32, "%*ld", c, v);
899 ans.append (buf + s - c, c);
903 ans.append (to_ustring (v));
907 static void format_float (ustring& ans, MNode* a, std::vector<ustring>& par) {
913 p1 = strtol (par[0]);
915 p2 = strtol (par[1]);
924 ans.append (buf, snprintf (buf, 32, "%*.*lf", p1, p2, to_double (a)));
927 static void format_string (ustring& ans, MNode* a, std::vector<ustring>& par) {
930 ustring u = to_string (a);
936 if (par.size () > 1) {
937 if (match (par[1], CharConst ("right")) || match (par[1], CharConst ("r")))
940 throw (par[1] + uErrorBadParam);
944 ans.append (p - u.size (), ' ').append (u);
949 ans.append (u).append (p - u.size (), ' ');
955 static void format_literal (ustring& ans, MNode* a, const char* list[], int offset, size_t size) {
959 v = to_int (a) - offset;
960 if (0 <= v && v < size)
961 ans.append (list[v]);
965 static void format_month (ustring& ans, MNode* a, std::vector<ustring>& par) {
966 static const char* mstr_a[] = {
967 "Jan", "Feb", "Mar", "Apr",
968 "May", "Jun", "Jul", "Aug",
969 "Sep", "Oct", "Nov", "Dec"
971 format_literal (ans, a, mstr_a, 1, 12);
974 static void format_Month (ustring& ans, MNode* a, std::vector<ustring>& par) {
975 static const char* mstr[] = {
976 "January", "February", "March", "April",
977 "May", "June", "July", "August",
978 "September", "October", "November", "December"
980 format_literal (ans, a, mstr, 1, 12);
983 static void format_week (ustring& ans, MNode* a, std::vector<ustring>& par) {
984 static const char* wstr_a[] = {
985 "Sun", "Mon", "Tue", "Wed",
988 format_literal (ans, a, wstr_a, 0, 7);
991 static void format_Week (ustring& ans, MNode* a, std::vector<ustring>& par) {
992 static const char* wstr[] = {
993 "Sunday", "Monday", "Tuesday", "Wednesday",
994 "Thursday", "Friday", "Saturday"
996 format_literal (ans, a, wstr, 0, 7);
999 ustring formatString (const ustring& format, boost::ptr_vector<MNodePtr>& par) {
1005 static uregex re ("\\$\\{([1-9][0-9]*)(:([a-zA-Z][a-zA-Z0-9]*)(:([0-9a-z.:]+))?)?\\}");
1009 void (*fn)(ustring& ans, MNode* a, std::vector<ustring>& par);
1011 {CharConst ("hex"), format_hex},
1012 {CharConst ("HEX"), format_HEX},
1013 {CharConst ("int"), format_int},
1014 {CharConst ("int0"), format_int0},
1015 {CharConst ("float"), format_float},
1016 {CharConst ("string"), format_string},
1017 {CharConst ("month"), format_month},
1018 {CharConst ("Month"), format_Month},
1019 {CharConst ("week"), format_week},
1020 {CharConst ("Week"), format_Week},
1024 b = format.begin ();
1026 while (usearch (b, e, m, re)) {
1027 ans.append (b, m[0].first);
1029 i = strtoul (ustring (m[1].first, m[1].second)) - 1;
1030 if (i < par.size ()) {
1035 if (! m[2].matched) {
1037 ans.append (to_string (a));
1039 std::vector<ustring> fpar;
1042 split (m[5].first, m[5].second, re_colon, fpar);
1043 for (i = 0; formatFunc[i].name; i ++) {
1044 if (match (m[3].first, m[3].second, formatFunc[i].name, formatFunc[i].namelen)) {
1045 (*formatFunc[i].fn) (ans, a, fpar);
1049 ans.append (m[0].first, m[0].second);
1067 ustring formatDateString (const ustring& format, boost::ptr_vector<MNodePtr>& par) {
1073 static uregex re ("\\$\\{([YMDhmsWw])(:([0-9]))?\\}");
1075 b = format.begin ();
1077 while (usearch (b, e, m, re)) {
1078 std::vector<ustring> fpar;
1079 ans.append (b, m[0].first);
1081 switch (*m[1].first) {
1108 if (! m[2].matched) {
1109 switch (*m[1].first) {
1111 format_Week (ans, a, fpar);
1114 format_week (ans, a, fpar);
1118 ans.append (to_string (a));
1121 format_int (ans, a, strtol (ustring (m[3].first, m[3].second)), true);