1 #include "util_string.h"
2 #include "util_const.h"
3 #include "util_random.h"
4 #include "util_splitter.h"
8 #include <boost/regex.hpp>
9 #include <boost/regex/pattern_except.hpp>
10 #include <boost/algorithm/string.hpp>
20 UIConv::UIConv (const char* in, const char* out) {
21 cd = iconv_open (in, out);
22 if (cd == ICONV_ERR) {
23 throw (ustring (in).append (CharConst (", ")).append (ustring (out)).append (CharConst (": unknown encoding.")));
27 ustring UIConv::cv (const ustring& text) {
30 if (cd != ICONV_ERR) {
31 char* buf = new char[4096];
34 size_t isize, osize, rsize;
36 ibuf = text.begin ().base ();
42 rsize = ::iconv (cd, (char**)&ibuf, &isize, &obuf, &osize);
44 rsize = ::iconv (cd, &ibuf, &isize, &obuf, &osize);
47 if (errno == EILSEQ) {
50 ans.append (CharConst ("_"));
51 } else if (errno == EINVAL) {
52 } else if (errno == E2BIG) {
58 ans.append (buf, obuf - buf);
65 static bool isDigit (int c) {
66 return '0' <= c && c <= '9';
69 ustring c3 (const ustring& str) {
75 if (str[0] == '-' || str[0] == '+') {
80 if (matchHeadFn (t, e, isDigit)) {
82 int l = str.size () + n / 3;
86 ans.append (1, str[0]);
90 if (n > 1 && n % 3 == 1) {
91 ans.append (CharConst (","));
95 for (; b != e; b ++) {
104 ustring to_ustring (double val) {
106 return ustring (b, snprintf (b, 32, "%.*g", DBL_DIG, val));
109 static int hex (char c) {
110 if ('0' <= c && c <= '9') {
112 } else if ('a' <= c && c <= 'f') {
113 return (c - 'a' + 10);
114 } else if ('A' <= c && c <= 'F') {
115 return (c - 'A' + 10);
121 static int hex (char c1, char c2) {
122 return (hex (c1) * 16 + hex (c2));
125 static char hexchar (int c) {
126 if (0 <= c && c <= 9)
128 else if (10 <= c && c <= 15)
134 static char hexchar_c (int c) {
135 if (0 <= c && c <= 9)
137 else if (10 <= c && c <= 15)
143 static ustring percentHex (int c) {
144 ustring ans (3, '%');
146 ans[1] = hexchar ((c >> 4) & 0x0f);
147 ans[2] = hexchar (c & 0x0f);
151 ustring percentHEX (int c) {
152 ustring ans (3, '%');
154 ans[1] = hexchar_c ((c >> 4) & 0x0f);
155 ans[2] = hexchar_c (c & 0x0f);
159 ustring urldecode_nonul (const ustring& str) {
161 static uregex re ("(\\+)|%([0-9a-fA-F][0-9a-fA-F])|\\x00");
165 ans.reserve (str.size ());
168 while (usearch (b, e, m, re)) {
169 if (b != m[0].first) {
170 ans.append (b, m[0].first);
174 } else if (m[2].matched) {
175 int v = hex (*(m[2].first), *(m[2].first + 1));
189 static ustring omitPattern (const ustring& text, int (*fn)(int)) {
190 uiterator b = text.begin ();
191 uiterator e = text.end ();
193 for (; p < e; ++ p) {
201 ans.reserve (text.length ());
204 for (; p < e; ++ p) {
212 ustring omitCtrl (const ustring& str) {
213 return omitPattern (str, iscntrl);
216 static int iscntrlx (int c) {
217 static char table_ctrlx[] = {
218 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1,
219 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
220 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
221 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
222 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
223 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
224 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
225 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
227 if (0 <= c && c < 128)
228 return table_ctrlx[c];
232 ustring omitCtrlX (const ustring& str) {
233 return omitPattern (str, iscntrlx);
236 static int isNUL (int c) {
240 ustring omitNul (const ustring& str) {
241 return omitPattern (str, isNUL);
244 static int iscrlfchar (int c) {
245 return c == 0x0a || c == 0x0d;
248 ustring omitNL (const ustring& str) {
249 return omitPattern (str, iscrlfchar);
252 static int isnonasciichar (int c) {
253 return c < 0x20 || c > 0x7e;
256 ustring omitNonAscii (const ustring& str) {
257 return omitPattern (str, isnonasciichar);
260 static int isnonasciiword (int c) {
261 return c < 0x21 || c > 0x7e;
264 ustring omitNonAsciiWord (const ustring& str) {
265 return omitPattern (str, isnonasciiword);
268 static ustring percentEncode (Splitter& sp) {
271 while (sp.nextSep ()) {
272 if (sp.preSize () > 0)
273 ans.append (sp.pre ());
274 c = *sp.matchBegin ();
276 ans.append (uUScore);
278 ans.append (percentHEX (c));
281 if (sp.preSize () > 0)
282 ans.append (sp.pre ());
286 static bool findPercentChar (uiterator& b, uiterator e, uiterator& u) {
287 static char table_percentchar[] = { // (\x00)|([^A-Za-z0-9_.~\-])
288 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
289 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
290 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1,
291 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
292 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
293 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
294 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
295 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
298 for (; b < e; ++ b) {
300 if (c < 0 || c >= 128 || table_percentchar[c]) {
309 ustring percentEncode (uiterator b, uiterator e) {
310 // static uregex re ("(\\x00)|([^A-Za-z0-9_.~-])");
311 SplitterFn sp (b, e, findPercentChar);
312 return percentEncode (sp);
315 static bool findPercentPathChar (uiterator& b, uiterator e, uiterator& u) {
316 static char table_percentpathchar[] = { // (\x00)|([^A-Za-z0-9_\/.~\-])
317 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
318 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
319 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
321 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
322 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
323 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
324 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
327 for (; b < e; ++ b) {
329 if (c < 0 || c >= 128 || table_percentpathchar[c]) {
338 ustring percentEncode_path (uiterator b, uiterator e) {
339 // static uregex re ("(\\x00)|([^A-Za-z0-9_/.~-])");
340 SplitterFn sp (b, e, findPercentPathChar);
341 return percentEncode (sp);
344 ustring percentDecode (const ustring& str) {
346 static uregex re ("%([0-9a-fA-F][0-9a-fA-F])|\\x00");
352 while (usearch (b, e, m, re)) {
353 if (b != m[0].first) {
354 ans.append (b, m[0].first);
357 int v = hex (*(m[1].first), *(m[1].first + 1));
368 return fixUTF8 (ans);
371 static bool findCookieEncChar (uiterator& b, uiterator e, uiterator& u) {
372 static char table_cookieencode[] = { // ([\\x00-\\x1f\\x7f])|([ ,;%\\x80-\\xff])
373 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
374 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
375 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
376 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
377 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
378 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
379 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
380 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
383 for (; b < e; ++ b) {
385 if (c < 0 || c >= 128 || table_cookieencode[c]) {
394 ustring cookieencode (const ustring& text) {
395 // static uregex re ("([\\x00-\\x1f\\x7f])|([ ,;%\\x80-\\xff])");
396 SplitterFn sp (text.begin (), text.end (), findCookieEncChar);
397 return percentEncode (sp);
400 ustring cookiedecode (const ustring& text) {
405 static uregex re ("%([0-9a-fA-F])([0-9a-fA-F])");
409 while (usearch (b, e, m, re)) {
411 ans.append (ustring (b, m[0].first));
412 a = hex (*m[1].first, *m[2].first);
417 ans.append (ustring (b, e));
422 ustring clipColon (const ustring& text) {
426 for (i = 0; i < ans.size (); i ++) {
433 ustring dirPart (const ustring& path) {
434 ustring::size_type s = path.rfind ('/', path.size ());
436 if (s == ustring::npos) {
440 return ustring (path.begin (), path.begin () + s);
444 ustring filePart_osSafe (const ustring& path) {
446 static uregex re ("[^\\\\/]+$");
448 if (usearch (path, m, re)) {
449 return ustring (m[0].first, m[0].second);
455 void split (uiterator b, uiterator e, uregex& re, std::vector<ustring>& ans) {
456 SplitterRe sp (b, e, re);
459 ans.push_back (sp.pre ());
463 void split (uiterator b, uiterator e, int ch, std::vector<ustring>& ans) {
464 SplitterCh sp (b, e, ch);
467 ans.push_back (sp.pre ());
471 void splitE (uiterator b, uiterator e, uregex& re, std::vector<ustring>& ans) {
472 SplitterRe sp (b, e, re);
475 while (sp.nextSep ()) {
476 ans.push_back (sp.pre ());
478 ans.push_back (sp.pre ());
482 void splitE (uiterator b, uiterator e, int ch, std::vector<ustring>& ans) {
483 SplitterCh sp (b, e, ch);
486 while (sp.nextSep ()) {
487 ans.push_back (sp.pre ());
489 ans.push_back (sp.pre ());
493 bool splitChar (uiterator b, uiterator e, uiterator::value_type ch, uiterator& m1) {
494 for (; b < e; b ++) {
504 ustring escape_re (const ustring& text) {
505 ustring::const_iterator b, e;
510 static uregex re ("[^\\x01- !\"#%',/0-9:;<=>@A-Z_`a-z~\\x7f-\\xff-]");
514 ans.reserve (text.size () + 16);
517 while (b != e && usearch (b, e, m, re)) {
519 ans.append (b, m[0].first);
521 buf[2] = hexchar ((c >> 4) & 0x0f);
522 buf[3] = hexchar (c & 0x0f);
531 ustring slashEncode (const ustring& text) {
532 ustring::const_iterator b, e;
537 static uregex re ("([\\x00-\\x1f\\x7f])|(\\\\)|(\")");
543 while (b != e && usearch (b, e, m, re)) {
545 ans.append (b, m[0].first);
550 ans.append (CharConst ("\\t"));
553 ans.append (CharConst ("\\r"));
556 ans.append (CharConst ("\\n"));
559 buf[2] = hexchar ((c >> 4) & 0x0f);
560 buf[3] = hexchar (c & 0x0f);
563 } else if (m[2].matched) {
564 ans.append (CharConst ("\\\\"));
565 } else if (m[3].matched) {
566 ans.append (CharConst ("\\\""));
577 ustring slashDecode (const ustring& text) {
578 ustring::const_iterator b, e;
582 static uregex re ("\\\\([0-7][0-7][0-7]|[\\x00-\\x7f])");
586 while (b != e && usearch (b, e, m, re)) {
588 ans.append (b, m[0].first);
593 ans.append (CharConst ("\t"));
596 ans.append (CharConst ("\r"));
599 ans.append (CharConst ("\n"));
602 if (m[0].second - m[0].first == 4) {
608 if (0 < c && c < 0x20)
621 unsigned long strtoul (const ustring& str) {
622 return strtoul (str.c_str (), NULL, 10);
625 unsigned long strtoul (const uiterator& b) {
626 return strtoul (&*b, NULL, 10);
629 long strtol (const ustring& str) {
630 return strtol (str.c_str (), NULL, 10);
633 double strtod (const ustring& str) {
634 return strtod (str.c_str (), NULL);
637 bool passMatch (const ustring& pass, const ustring& cpass) {
638 if (pass.length () == 0 || cpass.length () == 0)
640 return (strcmp (crypt (pass.c_str (), cpass.c_str ()), cpass.c_str ()) == 0);
643 ustring passCrypt (const ustring& pass) {
644 ustring salt = makeSalt ();
645 return ustring (crypt (pass.c_str (), salt.c_str ()));
648 size_t strLength (const ustring& src) {
660 void substring (const ustring& src, size_t idx, size_t len, int flen, ustring& ans) {
666 for (i = 0; i < idx && b < e; i ++)
670 for (i = 0; i < len && t < e; i ++)
678 static bool jssafe[] = {
679 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0--15
680 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 16--31
681 1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1, // 32--47
682 1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0, // 48--63
683 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 64--79
684 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, // 80--95
685 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 96--111
686 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0, // 112--127
689 ustring jsEncode (const ustring& str) {
696 ans.reserve (u.size () * 3);
699 for (i = 0; i < u.size (); i += 2) {
702 if (c == 0 && 0 < d && d < 127 && jssafe[d]) {
705 b[2] = hexchar ((c >> 4) & 0x0f);
706 b[3] = hexchar (c & 0x0f);
707 b[4] = hexchar ((d >> 4) & 0x0f);
708 b[5] = hexchar (d & 0x0f);
715 ustring filenameEncode (const ustring& text) {
716 static uregex re ("([\\x00-\\x1f\\x7f])|([^a-zA-Z0-9._-])|(^\\.+)");
717 SplitterRe sp (text, re);
721 if (text.length () == 0) {
722 throw (ustring (text).append (uErrorBadName));
724 ans.reserve (text.length () + 16);
726 if (sp.begin () < sp.end ())
727 ans.append (sp.begin (), sp.end ());
729 } else if (sp.match (2)) {
730 c = *sp.matchBegin (2);
732 ans.append (1, hexchar ((c >> 4) & 0x0f));
733 ans.append (1, hexchar (c & 0x0f));
734 } else if (sp.match (3)) {
735 for (c = sp.matchEnd (3) - sp.matchBegin (3); c > 0; c --) {
736 ans.append (CharConst (":2e"));
740 if (ans.length () > 250)
745 ustring filenameDecode (const ustring& text) {
746 static uregex re (":([0-9a-fA-F][0-9a-fA-F])");
747 SplitterRe sp (text, re);
751 ans.reserve (text.length ());
753 if (sp.begin () < sp.end ())
754 ans.append (sp.begin (), sp.end ());
756 c = hex (*(sp.matchBegin (1))) * 16 + hex (*(sp.matchBegin (1) + 1));
757 if (32 <= c && c < 256)
764 bool matchSkip (uiterator& b, uiterator e, const char* t, size_t s) {
765 if (e - b >= s && memcmp (t, &b[0], s) == 0) {
773 bool matchHead (uiterator& b, uiterator e, const char* t, size_t s) {
774 if (e - b >= s && memcmp (t, &b[0], s) == 0) {
781 bool matchHead (const ustring& str, const char* t, size_t s) {
782 if (str.length () >= s && memcmp (t, &*str.begin (), s) == 0) {
789 bool matchHead (const ustring& str, const ustring& head) {
790 if (str.length () >= head.length () && memcmp (&*str.begin (), &*head.begin (), head.length ()) == 0) {
797 bool match (uiterator b, uiterator e, const char* t, size_t s) {
798 if (e - b == s && memcmp (t, &b[0], s) == 0) {
805 bool match (const ustring& str, const char* t, size_t s) {
806 if (str.length () == s && memcmp (t, str.data (), s) == 0) {
813 bool match (uiterator b, uiterator e, const ustring& str) {
814 if (e - b == str.length () && memcmp (str.data (), &b[0], str.length ()) == 0) {
821 bool match (const ustring& str, const char* t, size_t s, const char* t2, size_t s2) {
822 if (match (str, t, s) || match (str, t2, s2)) {
829 ustring clipWhite (uiterator b, uiterator e) {
837 if (isblank (*(e - 1))) {
842 return ustring (b, e);
844 ustring clipWhite (const ustring& str) {
845 return clipWhite (str.begin (), str.end ());
848 ustring getenvString (const char* key) {
849 char* e = getenv (key);
857 ustring zeroPad (int n, const ustring& src) {
860 n = std::min (32, n);
861 m = n - src.length ();
873 ustring padEmpty (const ustring& name) {
875 return ustring (CharConst ("(null)"));
880 uint32_t hextoul (uiterator b, uiterator e) {
884 for (n = 0; n < 8 && b != e; n ++, b ++) {
885 ans = (ans << 4) + hex (*b);
890 ustring toCRLF (const ustring& str) {
891 uiterator b = str.begin ();
892 uiterator e = str.end ();
897 while (findChar (b, e, '\n')) {
898 ans.append (p, b).append (uCRLF);
906 void skipChar (uiterator& b, uiterator e, int ch) {
907 while (b < e && *b == ch)
911 void skipNextToChar (uiterator& b, uiterator e, int ch) {
918 static ustring::value_type toLower_ustring_value (ustring::value_type v) {
919 if ('A' <= v && v <= 'Z') {
920 return v - 'A' + 'a';
926 ustring toLower (uiterator b, uiterator e) {
931 for (; b < e; b ++, i++) {
932 *i = toLower_ustring_value (*b);
937 static ustring colpad0 (int n, const ustring& src) {
941 n = std::min (32, n);
942 m = n - src.length ();
952 return ustring (src.end () - n, src.end ());
961 ${M:2}, ${M}, ${M:name}, ${M:ab}
969 ustring formatDateString (const ustring& format, struct tm& v) {
974 static uregex re ("\\$\\{(([YMDhmsWwo])(:([0-9]))?|M:((name)|(ab)|(abname)))\\}");
975 std::vector<ustring> fpar;
979 while (usearch (b, e, m, re)) {
980 ans.append (b, m[0].first);
983 if (m[6].matched) { // name
984 ans.append (MStr[v.tm_mon]);
985 } else if (m[7].matched || m[8].matched) { // abname
986 ans.append (MStr_a[v.tm_mon]);
990 pc = strtol (ustring (m[4].first, m[4].second));
994 switch (*m[2].first) {
996 ans.append (colpad0 (pc, to_ustring (v.tm_year + 1900)));
999 ans.append (colpad0 (pc, to_ustring (v.tm_mon + 1)));
1002 ans.append (colpad0 (pc, to_ustring (v.tm_mday)));
1005 ans.append (colpad0 (pc, to_ustring (v.tm_hour)));
1008 ans.append (colpad0 (pc, to_ustring (v.tm_min)));
1011 ans.append (colpad0 (pc, to_ustring (v.tm_sec)));
1014 ans.append (WStr [v.tm_wday]);
1017 ans.append (WStr_a [v.tm_wday]);
1022 if (v.tm_gmtoff < 0) {
1023 h = - v.tm_gmtoff / 60;
1026 ans.append (CharConst ("-")).append (colpad0 (4, to_ustring (h * 100 + m)));
1028 h = v.tm_gmtoff / 60;
1031 ans.append (CharConst ("+")).append (colpad0 (4, to_ustring (h * 100 + m)));
1043 ustring toLower (const ustring& str) {
1044 return boost::to_lower_copy (str);
1047 ustring toUpper (const ustring& str) {
1048 return boost::to_upper_copy (str);
1051 ustring hexEncode (const ustring& data) {
1055 ans.reserve (data.length () * 2);
1058 for (; b < e; b ++) {
1059 ans.append (1, hexchar ((*b >> 4) & 0x0f));
1060 ans.append (1, hexchar (*b & 0x0f));
1065 int octchar (uiterator b) { // 3bytes
1069 ans = ans * 8 + *b - '0';
1071 ans = ans * 8 + *b - '0';
1075 ustring octchar (int c) {
1077 ans[2] = (c & 0x7) + '0';
1079 ans[1] = (c & 0x7) + '0';
1081 ans[0] = (c & 0x3) + '0';
1085 bool findNL (uiterator& b, uiterator e, uiterator& u) {
1086 for (; b < e; ++ b) {
1090 } else if (*b == '\r') {
1092 if (u < e && *u == '\n')
1101 bool findNLb (uiterator& b, uiterator e) {
1102 for (; b < e; ++ b) {
1106 } else if (*b == '\r') {
1108 if (b < e && *b == '\n')
1116 bool findChar (uiterator& b, uiterator e, int ch) {
1117 for (; b < e; ++ b) {
1125 bool findChars (uiterator& b, uiterator e, const ustring& pattern) {
1126 for (; b < e; ++ b) {
1127 if (pattern.find (*b) != ustring::npos) {
1134 bool findCharFn (uiterator& b, uiterator e, bool (*fn)(int)) {
1135 for (; b < e; ++ b) {
1142 bool findSepColon (uiterator& b, uiterator e, uiterator& u) {
1143 // " *; *"を探索する。bは進む
1145 if (findChar (b, e, ';')) {
1147 while (p < b && *(b - 1) == ' ')
1149 while (u < e && *u == ' ')
1157 bool matchHeadFn (uiterator& b, uiterator e, bool (*fn)(int)) {
1158 if (b < e && fn (*b)) {
1161 } while (b < e && fn (*b));
1167 bool matchWordTbl (uiterator b, uiterator e, char* tbl) {
1172 if (0 <= c && c < 128 && tbl[c]) { // 128〜はfalse
1184 bool matchWordFn (uiterator b, uiterator e, bool (*fn)(int)) {
1189 if (0 <= c && c < 128 && fn (c)) {