1 #include "util_string.h"
2 #include "util_const.h"
3 #include "util_random.h"
10 #include <boost/regex.hpp>
11 #include <boost/regex/pattern_except.hpp>
21 ustring c3 (const ustring& str) {
23 static uregex re ("^[0-9]+");
29 if (str[0] == '-' || str[0] == '+') {
33 if (usearch (b, e, m, re)) {
34 int n = m[0].second - m[0].first;
35 int l = str.size () + n / 3;
40 ans.append (1, str[0]);
42 for (; b != m[0].second; b ++) {
44 if (n > 1 && n % 3 == 1) {
45 ans.append (CharConst (","));
49 for (; b != e; b ++) {
58 ustring to_ustring (double val) {
60 return ustring (b, snprintf (b, 32, "%.*g", DBL_DIG, val));
63 static int hex (char c) {
64 if ('0' <= c && c <= '9') {
66 } else if ('a' <= c && c <= 'f') {
67 return (c - 'a' + 10);
68 } else if ('A' <= c && c <= 'F') {
69 return (c - 'A' + 10);
75 static int hex (char c1, char c2) {
76 return (hex (c1) * 16 + hex (c2));
79 static char hexchar (int c) {
82 else if (10 <= c <= 15)
88 static ustring percentHex (int c) {
91 ans[1] = hexchar ((c >> 4) & 0x0f);
92 ans[2] = hexchar (c & 0x0f);
96 ustring urldecode_nonul (const ustring& str) {
98 static uregex re ("(\\+)|%([0-9a-fA-F][0-9a-fA-F])|\\x00");
102 ans.reserve (str.size ());
105 while (usearch (b, e, m, re)) {
106 if (b != m[0].first) {
107 ans.append (b, m[0].first);
111 } else if (m[2].matched) {
112 int v = hex (*(m[2].first), *(m[2].first + 1));
126 static ustring omitPattern (const ustring& text, uregex& re) {
127 Splitter sp (text, re);
132 ans.reserve (text.length ());
133 if (sp.begin () != sp.end ())
134 ans.append (sp.begin (), sp.end ());
136 if (sp.begin () != sp.end ())
137 ans.append (sp.begin (), sp.end ());
148 ustring omitCtrl (const ustring& str) {
149 static uregex re ("[\\x00-\\x1f\\x7f]+");
150 return omitPattern (str, re);
153 ustring omitNul (const ustring& str) {
154 static uregex re ("[\\x00]+");
155 return omitPattern (str, re);
158 ustring omitNL (const ustring& str) {
159 return omitPattern (str, re_nl);
162 ustring omitNonAscii (const ustring& str) {
163 static uregex re ("[^ -\\x7e]+");
164 return omitPattern (str, re);
167 ustring omitNonAsciiWord (const ustring& str) {
168 static uregex re ("[^\\x21-\\x7e]+");
169 return omitPattern (str, re);
172 bool to_bool (const ustring& v) {
173 if (v.length () == 0 || (v.length () == 1 && v[0] == '0')) {
180 static ustring percentEncode (const ustring& text, uregex& re) {
190 if (b != e && usearch (b, e, m, re)) {
191 if (b != m[0].first) {
192 ans.append (ustring (b, m[0].first));
195 ans.append (uUScore);
196 } else if (m[2].matched) {
197 ans.append (percentHex (*m[2].first));
202 while (b != e && usearch (b, e, m, re)) {
203 if (b != m[0].first) {
204 ans.append (ustring (b, m[0].first));
207 ans.append (uUScore);
208 } else if (m[2].matched) {
209 ans.append (percentHex (*m[2].first));
216 ans.append (ustring (b, e));
224 ustring urlencode (const ustring& url) {
225 static uregex re ("(\\x00)|([^a-zA-Z0-9_.,/-])");
227 return percentEncode (url, re);
230 ustring cookieencode (const ustring& text) {
231 static uregex re ("([\\x00-\\x1f\\x7f])|([ ,;%\\x80-\\xff])");
233 return percentEncode (text, re);
236 ustring cookiedecode (const ustring& text) {
241 static uregex re ("%([0-9a-fA-F])([0-9a-fA-F])");
245 while (usearch (b, e, m, re)) {
247 ans.append (ustring (b, m[0].first));
248 a = hex (*m[1].first, *m[2].first);
253 ans.append (ustring (b, e));
258 ustring clipColon (const ustring& text) {
262 for (i = 0; i < ans.size (); i ++) {
269 ustring dirPart (char* path) {
270 char* e = rindex (path, '/');
272 if (e && e != path) {
273 return ustring (path, e - path);
279 ustring dirPart (const ustring& path) {
280 ustring::size_type s = path.rfind ('/', path.size ());
282 if (s == ustring::npos) {
285 return ustring (path.begin (), path.begin () + s);
289 ustring filePart_osSafe (const ustring& path) {
291 static uregex re ("[^\\\\/]+$");
293 if (usearch (path, m, re)) {
294 return ustring (m[0].first, m[0].second);
300 void split (uiterator b, uiterator e, uregex& re, std::vector<ustring>& ans) {
301 Splitter sp (b, e, re);
304 ans.push_back (sp.cur ());
308 bool splitChar (uiterator b, uiterator e, uiterator::value_type ch, uiterator& m1) {
309 for (; b < e; b ++) {
319 static char Base64Char[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
320 ustring base64Encode (uiterator b, uiterator e) {
331 ans.append (1, Base64Char[(c0 >> 2) & 0x3f]);
332 ans.append (1, Base64Char[((c0 & 0x03) << 4) | ((c1 >> 4) & 0x0f)]);
333 ans.append (1, Base64Char[((c1 & 0x0f) << 2) | ((c2 >> 6) & 0x03)]);
334 ans.append (1, Base64Char[c2 & 0x3f]);
335 } else if (size == 2) {
338 ans.append (1, Base64Char[(c0 >> 2) & 0x3f]);
339 ans.append (1, Base64Char[((c0 & 0x03) << 4) | ((c1 >> 4) & 0x0f)]);
340 ans.append (1, Base64Char[((c1 & 0x0f) << 2)]);
342 } else if (size == 1) {
344 ans.append (1, Base64Char[(c0 >> 2) & 0x3f]);
345 ans.append (1, Base64Char[((c0 & 0x03) << 4)]);
355 ustring escape_re (const ustring& text) {
356 ustring::const_iterator b, e;
361 static uregex re ("[^\\x01- !\"#%',/0-9:;<=>@A-Z_`a-z~\\x7f-\\xff-]");
365 ans.reserve (text.size () + 16);
368 while (b != e && usearch (b, e, m, re)) {
370 ans.append (b, m[0].first);
372 buf[2] = hexchar ((c >> 4) & 0x0f);
373 buf[3] = hexchar (c & 0x0f);
382 ustring slashEncode (const ustring& text) {
383 ustring::const_iterator b, e;
388 static uregex re ("([\\x00-\\x1f\\x7f])|(\\\\)|(\")");
394 while (b != e && usearch (b, e, m, re)) {
396 ans.append (b, m[0].first);
401 ans.append (CharConst ("\\t"));
404 ans.append (CharConst ("\\r"));
407 ans.append (CharConst ("\\n"));
410 buf[2] = hexchar ((c >> 4) & 0x0f);
411 buf[3] = hexchar (c & 0x0f);
414 } else if (m[2].matched) {
415 ans.append (CharConst ("\\\\"));
416 } else if (m[3].matched) {
417 ans.append (CharConst ("\\\""));
428 ustring slashDecode (const ustring& text) {
429 ustring::const_iterator b, e;
433 static uregex re ("\\\\([0-7][0-7][0-7]|[\\x00-\\x7f])");
437 while (b != e && usearch (b, e, m, re)) {
439 ans.append (b, m[0].first);
444 ans.append (CharConst ("\t"));
447 ans.append (CharConst ("\r"));
450 ans.append (CharConst ("\n"));
453 if (m[0].second - m[0].first == 4) {
459 if (0 < c && c < 0x20)
472 unsigned long strtoul (const ustring& str) {
473 return strtoul (str.c_str (), NULL, 10);
476 unsigned long strtoul (const uiterator& b) {
477 return strtoul (&*b, NULL, 10);
480 long strtol (const ustring& str) {
481 return strtol (str.c_str (), NULL, 10);
484 double strtod (const ustring& str) {
485 return strtod (str.c_str (), NULL);
488 bool passMatch (const ustring& pass, const ustring& cpass) {
489 if (pass.length () == 0 || cpass.length () == 0)
491 return (strcmp (crypt (pass.c_str (), cpass.c_str ()), cpass.c_str ()) == 0);
494 ustring passCrypt (const ustring& pass) {
495 ustring salt = makeSalt ();
496 return ustring (crypt (pass.c_str (), salt.c_str ()));
499 size_t strLength (const ustring& src) {
511 void substring (const ustring& src, size_t idx, size_t len, int flen, ustring& ans) {
517 for (i = 0; i < idx && b < e; i ++)
521 for (i = 0; i < len && t < e; i ++)
529 static bool jssafe[] = {
530 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0--15
531 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 16--31
532 1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1, // 32--47
533 1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0, // 48--63
534 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 64--79
535 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, // 80--95
536 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 96--111
537 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0, // 112--127
540 ustring jsEncode (const ustring& str) {
547 ans.reserve (u.size () * 3);
550 for (i = 0; i < u.size (); i += 2) {
553 if (c == 0 && 0 < d && d < 127 && jssafe[d]) {
556 b[2] = hexchar ((c >> 4) & 0x0f);
557 b[3] = hexchar (c & 0x0f);
558 b[4] = hexchar ((d >> 4) & 0x0f);
559 b[5] = hexchar (d & 0x0f);
566 ustring filenameEncode (const ustring& text) {
567 static uregex re ("([\\x00-\\x1f\\x7f])|([^a-zA-Z0-9._-])|(^\\.+)");
568 Splitter sp (text, re);
572 if (text.length () == 0) {
573 throw (ustring (text).append (uErrorBadName));
575 ans.reserve (text.length () + 16);
577 if (sp.begin () < sp.end ())
578 ans.append (sp.begin (), sp.end ());
580 } else if (sp.match (2)) {
581 c = *sp.matchBegin (2);
583 ans.append (1, hexchar ((c >> 4) & 0x0f));
584 ans.append (1, hexchar (c & 0x0f));
585 } else if (sp.match (3)) {
586 for (c = sp.matchEnd (3) - sp.matchBegin (3); c > 0; c --) {
587 ans.append (CharConst (":2e"));
591 if (ans.length () > 250)
596 ustring filenameDecode (const ustring& text) {
597 static uregex re (":([0-9a-fA-F][0-9a-fA-F])");
598 Splitter sp (text, re);
602 ans.reserve (text.length ());
604 if (sp.begin () < sp.end ())
605 ans.append (sp.begin (), sp.end ());
607 c = hex (*(sp.matchBegin (1))) * 16 + hex (*(sp.matchBegin (1) + 1));
608 if (32 <= c && c < 256)
615 bool matchSkip (uiterator& b, uiterator e, const char* t, size_t s) {
616 if (e - b >= s && memcmp (t, &b[0], s) == 0) {
624 bool matchHead (uiterator& b, uiterator e, const char* t, size_t s) {
625 if (e - b >= s && memcmp (t, &b[0], s) == 0) {
632 bool matchHead (const ustring& str, const char* t, size_t s) {
633 if (str.length () >= s && memcmp (t, &*str.begin (), s) == 0) {
640 bool matchHead (const ustring& str, const ustring& head) {
641 if (str.length () >= head.length () && memcmp (&*str.begin (), &*head.begin (), head.length ()) == 0) {
648 bool match (uiterator b, uiterator e, const char* t, size_t s) {
649 if (e - b == s && memcmp (t, &b[0], s) == 0) {
656 bool match (const ustring& str, const char* t, size_t s) {
657 if (str.length () == s && memcmp (t, str.data (), s) == 0) {
664 bool match (uiterator b, uiterator e, const ustring& str) {
665 if (e - b == str.length () && memcmp (str.data (), &b[0], str.length ()) == 0) {
672 bool match (const ustring& str, const char* t, size_t s, const char* t2, size_t s2) {
673 if (match (str, t, s) || match (str, t2, s2)) {
680 ustring clipWhite (uiterator b, uiterator e) {
688 if (isblank (*(e - 1))) {
693 return ustring (b, e);
695 ustring clipWhite (const ustring& str) {
696 return clipWhite (str.begin (), str.end ());
699 ustring getenvString (const char* key) {
700 char* e = getenv (key);
708 ustring zeroPad (int n, const ustring& src) {
711 n = std::min (32, n);
712 m = n - src.length ();
724 bool wsearch (const ustring& text, boost::wsmatch& m, const ustring& reg, boost::wregex::flag_type reg_flags, boost::match_flag_type search_flags) {
726 std::wstring wtext = utow (text);
727 std::wstring wreg = utow (reg);
728 boost::wregex wre (wreg, reg_flags);
729 return regex_search (wtext, m, wre, search_flags);
730 } catch (boost::regex_error& err) {
731 throw (uErrorRegexp);
735 bool wsearch_env (MlEnv* mlenv, const ustring& text, boost::wsmatch& m, const ustring& reg, boost::wregex::flag_type reg_flags, boost::match_flag_type search_flags) {
737 mlenv->env->regtext = utow (text);
738 std::wstring wreg = utow (reg);
739 boost::wregex wre (wreg, reg_flags);
740 return regex_search (mlenv->env->regtext, m, wre, search_flags);
741 } catch (boost::regex_error& err) {
742 throw (uErrorRegexp);
746 ustring uiconv (const ustring& src, const char* tocode, const char* fromcode) {
751 size_t isize, osize, rsize;
754 cd = iconv_open (tocode, fromcode);
755 if (cd == (iconv_t)(-1))
756 throw (ustring ("bad encoding name."));
762 rsize = iconv (cd, &ibuf, &isize, &obuf, &osize);
766 ans.append (buf, obuf - buf);
772 ustring padEmpty (const ustring& name) {
774 return ustring (CharConst ("(null)"));
779 uint32_t hextoul (uiterator b, uiterator e) {
783 for (n = 0; n < 8 && b != e; n ++, b ++) {
784 ans = (ans << 4) + hex (*b);
789 ustring toCRLF (const ustring& str) {
790 uiterator b = str.begin ();
791 uiterator e = str.end ();
795 while (usearch (b, e, m, re_lf)) {
796 ans.append (b, m[0].first).append (uCRLF);
803 void skipSpace (uiterator& b, uiterator e) {
804 while (b < e && *b == ' ') {
809 static ustring::value_type toLower_ustring_value (ustring::value_type v) {
810 if ('A' <= v && v <= 'Z') {
811 return v - 'A' + 'a';
818 void toLower (ustring::iterator* b, ustring::iterator* e) {
819 transform (*b, *e, *b, toLower_ustring_value);
823 ustring toLower (uiterator b, uiterator e) {
828 for (; b < e; b ++, i++) {
829 *i = toLower_ustring_value (*b);
834 static void format_hex (ustring& ans, MNode* a, std::vector<ustring>& par, bool fcap) {
841 if (par.size () > 0) {
842 int p = strtol (par[0]);
848 ans.append (buf, snprintf (buf, 32, "%.*X", p, v));
850 ans.append (buf, snprintf (buf, 32, "%.*x", p, v));
853 ans.append (buf, snprintf (buf, 32, "%X", v));
855 ans.append (buf, snprintf (buf, 32, "%x", v));
859 static void format_hex (ustring& ans, MNode* a, std::vector<ustring>& par) {
860 format_hex (ans, a, par, false);
863 static void format_HEX (ustring& ans, MNode* a, std::vector<ustring>& par) {
864 format_hex (ans, a, par, true);
867 static void format_int_sub (ustring& ans, MNode* a, std::vector<ustring>& par, bool pad0 = false) {
875 if (par.size () > 0) {
879 if (match (par[0], CharConst ("comma")) || match (par[0], CharConst ("c"))) {
880 ans.append (c3 (to_ustring (v)));
882 int p = strtol (par[0]);
887 for (int i = 1; i < par.size (); i ++) {
888 if (match (par[i], CharConst ("clip"))) {
890 } else if (match (par[i], CharConst ("0"))) {
892 } else if (match (par[i], CharConst ("comma")) || match (par[i], CharConst ("c"))) {
895 throw (par[i] + uErrorBadParam);
899 s = snprintf (buf, 32, "%.*d", p, v);
901 s = snprintf (buf, 32, "%*d", p, v);
903 ans.append (buf + s - p, p);
904 else if (! fclip && fc3)
905 ans.append (c3 (ustring (buf, s)));
910 ans.append (to_ustring (v));
914 static void format_int (ustring& ans, MNode* a, std::vector<ustring>& par) {
915 format_int_sub (ans, a, par);
918 static void format_int0 (ustring& ans, MNode* a, std::vector<ustring>& par) {
919 format_int_sub (ans, a, par, true);
922 static void format_int (ustring& ans, MNode* a, int c, bool pad0 = false) {
934 s = snprintf (buf, 32, "%.*d", c, v);
936 s = snprintf (buf, 32, "%*d", c, v);
938 ans.append (buf + s - c, c);
942 ans.append (to_ustring (v));
946 static void format_float (ustring& ans, MNode* a, std::vector<ustring>& par) {
952 p1 = strtol (par[0]);
954 p2 = strtol (par[1]);
963 ans.append (buf, snprintf (buf, 32, "%*.*lf", p1, p2, to_double (a)));
966 static void format_string (ustring& ans, MNode* a, std::vector<ustring>& par) {
969 ustring u = to_string (a);
975 if (par.size () > 1) {
976 if (match (par[1], CharConst ("right")) || match (par[1], CharConst ("r")))
979 throw (par[1] + uErrorBadParam);
983 ans.append (p - u.size (), ' ').append (u);
988 ans.append (u).append (p - u.size (), ' ');
994 static void format_literal (ustring& ans, MNode* a, const char* list[], int offset, size_t size) {
998 v = to_int (a) - offset;
999 if (0 <= v && v < size)
1000 ans.append (list[v]);
1004 static void format_month (ustring& ans, MNode* a, std::vector<ustring>& par) {
1005 static const char* mstr_a[] = {
1006 "Jan", "Feb", "Mar", "Apr",
1007 "May", "Jun", "Jul", "Aug",
1008 "Sep", "Oct", "Nov", "Dec"
1010 format_literal (ans, a, mstr_a, 1, 12);
1013 static void format_Month (ustring& ans, MNode* a, std::vector<ustring>& par) {
1014 static const char* mstr[] = {
1015 "January", "February", "March", "April",
1016 "May", "June", "July", "August",
1017 "September", "October", "November", "December"
1019 format_literal (ans, a, mstr, 1, 12);
1022 static void format_week (ustring& ans, MNode* a, std::vector<ustring>& par) {
1023 static const char* wstr_a[] = {
1024 "Sun", "Mon", "Tue", "Wed",
1027 format_literal (ans, a, wstr_a, 0, 7);
1030 static void format_Week (ustring& ans, MNode* a, std::vector<ustring>& par) {
1031 static const char* wstr[] = {
1032 "Sunday", "Monday", "Tuesday", "Wednesday",
1033 "Thursday", "Friday", "Saturday"
1035 format_literal (ans, a, wstr, 0, 7);
1038 ustring formatString (const ustring& format, boost::ptr_vector<MNodePtr>& par) {
1044 static uregex re ("\\$\\{([1-9][0-9]*)(:([a-zA-Z][a-zA-Z0-9]*)(:([0-9a-z.:]+))?)?\\}");
1048 void (*fn)(ustring& ans, MNode* a, std::vector<ustring>& par);
1050 {CharConst ("hex"), format_hex},
1051 {CharConst ("HEX"), format_HEX},
1052 {CharConst ("int"), format_int},
1053 {CharConst ("int0"), format_int0},
1054 {CharConst ("float"), format_float},
1055 {CharConst ("string"), format_string},
1056 {CharConst ("month"), format_month},
1057 {CharConst ("Month"), format_Month},
1058 {CharConst ("week"), format_week},
1059 {CharConst ("Week"), format_Week},
1063 b = format.begin ();
1065 while (usearch (b, e, m, re)) {
1066 ans.append (b, m[0].first);
1068 i = strtoul (ustring (m[1].first, m[1].second)) - 1;
1069 if (i < par.size ()) {
1074 if (! m[2].matched) {
1076 ans.append (to_string (a));
1078 std::vector<ustring> fpar;
1081 split (m[5].first, m[5].second, re_colon, fpar);
1082 for (i = 0; formatFunc[i].name; i ++) {
1083 if (match (m[3].first, m[3].second, formatFunc[i].name, formatFunc[i].namelen)) {
1084 (*formatFunc[i].fn) (ans, a, fpar);
1088 ans.append (m[0].first, m[0].second);
1106 ustring formatDateString (const ustring& format, boost::ptr_vector<MNodePtr>& par) {
1112 static uregex re ("\\$\\{([YMDhmsWw])(:([0-9]))?\\}");
1114 b = format.begin ();
1116 while (usearch (b, e, m, re)) {
1117 std::vector<ustring> fpar;
1118 ans.append (b, m[0].first);
1120 switch (*m[1].first) {
1147 if (! m[2].matched) {
1148 switch (*m[1].first) {
1150 format_Week (ans, a, fpar);
1153 format_week (ans, a, fpar);
1157 ans.append (to_string (a));
1160 format_int (ans, a, strtol (ustring (m[3].first, m[3].second)), true);