lib/util_string.cc

   1 #include "util_string.h"
   2 #include "util_const.h"
   3 #include "util_random.h"
   4 #include "util_splitter.h"
   5 #include "ustring.h"
   6 #include "utf8.h"
   7 #include "utf16.h"
   8 #include <boost/regex.hpp>
   9 #include <boost/regex/pattern_except.hpp>
  10 #include <boost/algorithm/string.hpp>
  11 #include <vector>
  12 #include <algorithm>
  13 #include <stdlib.h>
  14 #include <unistd.h>
  15 #include <string.h>
  16 #include <time.h>
  17 #include <float.h>
  18 #include <ctype.h>
  19
  20 UIConv::UIConv (const char* in, const char* out) {
  21     cd = iconv_open (in, out);
  22     if (cd == ICONV_ERR) {
  23         throw (ustring (in).append (CharConst (", ")).append (ustring (out)).append (CharConst (": unknown encoding.")));
  24     }
  25 }
  26
  27 ustring  UIConv::cv (const ustring& text) {
  28     ustring  ans;
  29
  30     if (cd != ICONV_ERR) {
  31         char*  buf = new char[4096];
  32         const char*  ibuf;
  33         char*  obuf;
  34         size_t  isize, osize, rsize;
  35
  36         ibuf = text.begin ().base ();
  37         isize = text.size ();
  38         while (isize > 0) {
  39             obuf = buf;
  40             osize = 4096;
  41 #ifdef Linux
  42             rsize = ::iconv (cd, (char**)&ibuf, &isize, &obuf, &osize);
  43 #else
  44             rsize = ::iconv (cd, &ibuf, &isize, &obuf, &osize);
  45 #endif
  46             if (rsize == -1) {
  47                 if (errno == EILSEQ) {
  48                     ibuf ++;
  49                     isize --;
  50                     ans.append (CharConst ("_"));
  51                 } else if (errno == EINVAL) {
  52                 } else if (errno == E2BIG) {
  53                 } else {
  54                     break;
  55                 }
  56             }
  57             if (obuf > buf)
  58                 ans.append (buf, obuf - buf);
  59         }
  60         delete buf;
  61     }
  62     return ans;
  63 }
  64
  65 static bool  isDigit (int c) {
  66     return '0' <= c && c <= '9';
  67 }
  68
  69 ustring  c3 (const ustring& str) {
  70     bool  qsign = false;
  71     uiterator  b, e, t;
  72
  73     b = str.begin ();
  74     e = str.end ();
  75     if (str[0] == '-' || str[0] == '+') {
  76         qsign = true;
  77         b = b + 1;
  78     }
  79     t = b;
  80     if (matchHeadFn (t, e, isDigit)) {
  81         int  n = t - b;
  82         int  l = str.size () + n / 3;
  83         ustring  ans;
  84         ans.reserve (l);
  85         if (qsign) {
  86             ans.append (1, str[0]);
  87         }
  88         for (; b < t; ++ b) {
  89             ans.append (1, *b);
  90             if (n > 1 && n % 3 == 1) {
  91                 ans.append (CharConst (","));
  92             }
  93             n --;
  94         }
  95         for (; b != e; b ++) {
  96             ans.append (1, *b);
  97         }
  98         return ans;
  99     } else {
 100         return str;
 101     }
 102 }
 103
 104 ustring  to_ustring (double val) {
 105     char  b[32];
 106     return ustring (b, snprintf (b, 32, "%.*g", DBL_DIG, val));
 107 }
 108
 109 static int  hex (char c) {
 110     if ('0' <= c && c <= '9') {
 111         return (c - '0');
 112     } else if ('a' <= c && c <= 'f') {
 113         return (c -  'a' + 10);
 114     } else if ('A' <= c && c <= 'F') {
 115         return (c - 'A' + 10);
 116     } else {
 117         return 0;
 118     }
 119 }
 120
 121 static int  hex (char c1, char c2) {
 122     return (hex (c1) * 16 + hex (c2));
 123 }
 124
 125 static char  hexchar (int c) {
 126     if (0 <= c && c <= 9)
 127         return '0' + c;
 128     else if (10 <= c && c <= 15)
 129         return 'a' - 10 + c;
 130     else
 131         return '0';
 132 }
 133
 134 static char  hexchar_c (int c) {
 135     if (0 <= c && c <= 9)
 136         return '0' + c;
 137     else if (10 <= c && c <= 15)
 138         return 'A' - 10 + c;
 139     else
 140         return '0';
 141 }
 142
 143 static ustring  percentHex (int c) {
 144     ustring  ans (3, '%');
 145
 146     ans[1] = hexchar ((c >> 4) & 0x0f);
 147     ans[2] = hexchar (c & 0x0f);
 148     return ans;
 149 }
 150
 151 ustring  percentHEX (int c) {
 152     ustring  ans (3, '%');
 153
 154     ans[1] = hexchar_c ((c >> 4) & 0x0f);
 155     ans[2] = hexchar_c (c & 0x0f);
 156     return ans;
 157 }
 158
 159 ustring  urldecode_nonul (const ustring& str) {
 160     ustring  ans;
 161     static uregex  re ("(\\+)|%([0-9a-fA-F][0-9a-fA-F])|\\x00");
 162     umatch  m;
 163     uiterator  b, e;
 164
 165     ans.reserve (str.size ());
 166     b = str.begin ();
 167     e = str.end ();
 168     while (usearch (b, e, m, re)) {
 169         if (b != m[0].first) {
 170             ans.append (b, m[0].first);
 171         }
 172         if (m[1].matched) {
 173             ans.append (1, ' ');
 174         } else if (m[2].matched) {
 175             int  v = hex (*(m[2].first), *(m[2].first + 1));
 176             if (v != 0)
 177                 ans.append (1, v);
 178         } else {
 179         }
 180         b = m[0].second;
 181     }
 182     if (b != e) {
 183         ans.append (b, e);
 184     }
 185
 186     return ans;
 187 }
 188
 189 static ustring  omitPattern (const ustring& text, int (*fn)(int)) {
 190     uiterator  b = text.begin ();
 191     uiterator  e = text.end ();
 192     uiterator  p = b;
 193     for (; p < e; ++ p) {
 194         if (fn (*p))
 195             break;
 196     }
 197     if (p == e) {
 198         return text;
 199     } else {
 200         ustring  ans;
 201         ans.reserve (text.length ());
 202         ans.assign (b, p);
 203         ++ p;
 204         for (; p < e; ++ p) {
 205             if (! fn (*p))
 206                 ans.append (1, *p);
 207         }
 208         return ans;
 209     }
 210 }
 211
 212 ustring  omitCtrl (const ustring& str) {
 213     return omitPattern (str, iscntrl);
 214 }
 215
 216 static int  iscntrlx (int c) {
 217     static char  table_ctrlx[] = {
 218         1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1,
 219         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 220         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 221         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 222         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 223         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 224         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 225         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
 226     };
 227     if (0 <= c && c < 128)
 228         return table_ctrlx[c];
 229     return 0;
 230 }
 231
 232 ustring  omitCtrlX (const ustring& str) {
 233     return omitPattern (str, iscntrlx);
 234 }
 235
 236 static int  isNUL (int c) {
 237     return c == 0;
 238 }
 239
 240 ustring  omitNul (const ustring& str) {
 241     return omitPattern (str, isNUL);
 242 }
 243
 244 static int  iscrlfchar (int c) {
 245     return c == 0x0a || c == 0x0d;
 246 }
 247
 248 ustring  omitNL (const ustring& str) {
 249     return omitPattern (str, iscrlfchar);
 250 }
 251
 252 static int  isnonasciichar (int c) {
 253     return c < 0x20 || c > 0x7e;
 254 }
 255
 256 ustring  omitNonAscii (const ustring& str) {
 257     return omitPattern (str, isnonasciichar);
 258 }
 259
 260 static int  isnonasciiword (int c) {
 261     return c < 0x21 || c > 0x7e;
 262 }
 263
 264 ustring  omitNonAsciiWord (const ustring& str) {
 265     return omitPattern (str, isnonasciiword);
 266 }
 267
 268 static ustring  percentEncode (Splitter& sp) {
 269     ustring  ans;
 270     int  c;
 271     while (sp.nextSep ()) {
 272         if (sp.preSize () > 0)
 273             ans.append (sp.pre ());
 274         c = *sp.matchBegin ();
 275         if (c == '\0') {
 276             ans.append (uUScore);
 277         } else {
 278             ans.append (percentHEX (c));
 279         }
 280     }
 281     if (sp.preSize () > 0)
 282         ans.append (sp.pre ());
 283     return ans;
 284 }
 285
 286 static bool  findPercentChar (uiterator& b, uiterator e, uiterator& u) {
 287     static char  table_percentchar[] = {                // (\x00)|([^A-Za-z0-9_.~\-])
 288         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 289         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 290         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1,
 291         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
 292         1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 293         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 294         1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 295         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
 296     };
 297     int  c;
 298     for (; b < e; ++ b) {
 299         c = *b;
 300         if (c < 0 || c >= 128 || table_percentchar[c]) {
 301             u = b + 1;
 302             return true;
 303         }
 304     }
 305     u = e;
 306     return false;
 307 }
 308
 309 ustring  percentEncode (uiterator b, uiterator e) {
 310 //    static uregex  re ("(\\x00)|([^A-Za-z0-9_.~-])");
 311     SplitterFn  sp (b, e, findPercentChar);
 312     return percentEncode (sp);
 313 }
 314
 315 static bool  findPercentPathChar (uiterator& b, uiterator e, uiterator& u) {
 316     static char  table_percentpathchar[] = {            // (\x00)|([^A-Za-z0-9_\/.~\-])
 317         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 318         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 319         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
 320         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
 321         1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 322         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
 323         1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 324         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
 325     };
 326     int  c;
 327     for (; b < e; ++ b) {
 328         c = *b;
 329         if (c < 0 || c >= 128 || table_percentpathchar[c]) {
 330             u = b + 1;
 331             return true;
 332         }
 333     }
 334     u = e;
 335     return false;
 336 }
 337
 338 ustring  percentEncode_path (uiterator b, uiterator e) {
 339 //    static uregex  re ("(\\x00)|([^A-Za-z0-9_/.~-])");
 340     SplitterFn  sp (b, e, findPercentPathChar);
 341     return percentEncode (sp);
 342 }
 343
 344 ustring  percentDecode (const ustring& str) {
 345     ustring  ans;
 346     static uregex  re ("%([0-9a-fA-F][0-9a-fA-F])|\\x00");
 347     umatch  m;
 348     uiterator  b, e;
 349
 350     b = str.begin ();
 351     e = str.end ();
 352     while (usearch (b, e, m, re)) {
 353         if (b != m[0].first) {
 354             ans.append (b, m[0].first);
 355         }
 356         if (m[1].matched) {
 357             int  v = hex (*(m[1].first), *(m[1].first + 1));
 358             if (v != 0)
 359                 ans.append (1, v);
 360         } else {
 361         }
 362         b = m[0].second;
 363     }
 364     if (b != e) {
 365         ans.append (b, e);
 366     }
 367
 368     return fixUTF8 (ans);
 369 }
 370
 371 static bool  findCookieEncChar (uiterator& b, uiterator e, uiterator& u) {
 372     static char  table_cookieencode[] = {               // ([\\x00-\\x1f\\x7f])|([ ,;%\\x80-\\xff])
 373         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 374         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 375         1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
 376         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
 377         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 378         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 379         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 380         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
 381     };
 382     int  c;
 383     for (; b < e; ++ b) {
 384         c = *b;
 385         if (c < 0 || c >= 128 || table_cookieencode[c]) {
 386             u = b + 1;
 387             return true;
 388         }
 389     }
 390     u = e;
 391     return false;
 392 }
 393
 394 ustring  cookieencode (const ustring& text) {
 395 //    static uregex  re ("([\\x00-\\x1f\\x7f])|([ ,;%\\x80-\\xff])");
 396     SplitterFn  sp (text.begin (), text.end (), findCookieEncChar);
 397     return percentEncode (sp);
 398 }
 399
 400 ustring  cookiedecode (const ustring& text) {
 401     umatch  m;
 402     uiterator  b, e;
 403     ustring  ans;
 404     int  a;
 405     static uregex  re ("%([0-9a-fA-F])([0-9a-fA-F])");
 406
 407     b = text.begin ();
 408     e = text.end ();
 409     while (usearch (b, e, m, re)) {
 410         if (b != m[0].first)
 411             ans.append (ustring (b, m[0].first));
 412         a = hex (*m[1].first, *m[2].first);
 413         ans.append (1, a);
 414         b = m[0].second;
 415     }
 416     if (b != e)
 417         ans.append (ustring (b, e));
 418
 419     return ans;
 420 }
 421
 422 ustring  clipColon (const ustring& text) {
 423     int  i;
 424     ustring  ans (text);
 425
 426     for (i = 0; i < ans.size (); i ++) {
 427         if (ans[i] == ':')
 428             ans[i] = '_';
 429     }
 430     return ans;
 431 }
 432
 433 ustring  dirPart (const ustring& path) {
 434     ustring::size_type  s = path.rfind ('/', path.size ());
 435
 436     if (s == ustring::npos) {
 437 //      return uSlash;
 438         return uDot;
 439     } else {
 440         return ustring (path.begin (), path.begin () + s);
 441     }
 442 }
 443
 444 ustring  filePart_osSafe (const ustring& path) {
 445     umatch  m;
 446     static uregex  re ("[^\\\\/]+$");
 447
 448     if (usearch (path, m, re)) {
 449         return ustring (m[0].first, m[0].second);
 450     } else {
 451         return uEmpty;
 452     }
 453 }
 454
 455 void  split (uiterator b, uiterator e, uregex& re, std::vector<ustring>& ans) {
 456     SplitterRe  sp (b, e, re);
 457
 458     while (sp.next ()) {
 459         ans.push_back (sp.pre ());
 460     }
 461 }
 462
 463 void  split (uiterator b, uiterator e, int ch, std::vector<ustring>& ans) {
 464     SplitterCh  sp (b, e, ch);
 465
 466     while (sp.next ()) {
 467         ans.push_back (sp.pre ());
 468     }
 469 }
 470
 471 void  splitE (uiterator b, uiterator e, uregex& re, std::vector<ustring>& ans) {
 472     SplitterRe  sp (b, e, re);
 473
 474     if (b < e) {
 475         while (sp.nextSep ()) {
 476             ans.push_back (sp.pre ());
 477         }
 478         ans.push_back (sp.pre ());
 479     }
 480 }
 481
 482 void  splitE (uiterator b, uiterator e, int ch, std::vector<ustring>& ans) {
 483     SplitterCh  sp (b, e, ch);
 484
 485     if (b < e) {
 486         while (sp.nextSep ()) {
 487             ans.push_back (sp.pre ());
 488         }
 489         ans.push_back (sp.pre ());
 490     }
 491 }
 492
 493 bool  splitChar (uiterator b, uiterator e, uiterator::value_type ch, uiterator& m1) {
 494     for (; b < e; b ++) {
 495         if (*b == ch) {
 496             m1 = b;
 497             return true;
 498         }
 499     }
 500     m1 = e;
 501     return false;
 502 }
 503
 504 ustring  escape_re (const ustring& text) {
 505     ustring::const_iterator  b, e;
 506     umatch  m;
 507     ustring  ans;
 508     int  c;
 509     char  buf[4];
 510     static uregex  re ("[^\\x01- !\"#%',/0-9:;<=>@A-Z_`a-z~\\x7f-\\xff-]");
 511
 512     buf[0] = '\\';
 513     buf[1] = 'x';
 514     ans.reserve (text.size () + 16);
 515     b = text.begin ();
 516     e = text.end ();
 517     while (b != e && usearch (b, e, m, re)) {
 518         if (b != m[0].first)
 519             ans.append (b, m[0].first);
 520         c = *m[0].first;
 521         buf[2] = hexchar ((c >> 4) & 0x0f);
 522         buf[3] = hexchar (c & 0x0f);
 523         ans.append (buf, 4);
 524         b = m[0].second;
 525     }
 526     if (b != e)
 527         ans.append (b, e);
 528     return ans;
 529 }
 530
 531 ustring  slashEncode (const ustring& text) {
 532     ustring::const_iterator  b, e;
 533     umatch  m;
 534     ustring  ans;
 535     int  c;
 536     char  buf[4];
 537     static uregex  re ("([\\x00-\\x1f\\x7f])|(\\\\)|(\")");
 538
 539     buf[0] = '\\';
 540     buf[1] = 'x';
 541     b = text.begin ();
 542     e = text.end ();
 543     while (b != e && usearch (b, e, m, re)) {
 544         if (b != m[0].first)
 545             ans.append (b, m[0].first);
 546         if (m[1].matched) {
 547             c = *m[0].first;
 548             switch (c) {
 549             case '\t':
 550                 ans.append (CharConst ("\\t"));
 551                 break;
 552             case '\r':
 553                 ans.append (CharConst ("\\r"));
 554                 break;
 555             case '\n':
 556                 ans.append (CharConst ("\\n"));
 557                 break;
 558             default:
 559                 buf[2] = hexchar ((c >> 4) & 0x0f);
 560                 buf[3] = hexchar (c & 0x0f);
 561                 ans.append (buf, 4);
 562             }
 563         } else if (m[2].matched) {
 564             ans.append (CharConst ("\\\\"));
 565         } else if (m[3].matched) {
 566             ans.append (CharConst ("\\\""));
 567         } else {
 568             assert (0);
 569         }
 570         b = m[0].second;
 571     }
 572     if (b != e)
 573         ans.append (b, e);
 574     return ans;
 575 }
 576
 577 ustring  slashDecode (const ustring& text) {
 578     ustring::const_iterator  b, e;
 579     umatch  m;
 580     ustring  ans;
 581     int  c;
 582     static uregex  re ("\\\\([0-7][0-7][0-7]|[\\x00-\\x7f])");
 583
 584     b = text.begin ();
 585     e = text.end ();
 586     while (b != e && usearch (b, e, m, re)) {
 587         if (b != m[0].first)
 588             ans.append (b, m[0].first);
 589         b = m[0].first + 1;
 590         c = *b;
 591         switch (c) {
 592         case 't':
 593             ans.append (CharConst ("\t"));
 594             break;
 595         case 'r':
 596             ans.append (CharConst ("\r"));
 597             break;
 598         case 'n':
 599             ans.append (CharConst ("\n"));
 600             break;
 601         default:
 602             if (m[0].second - m[0].first == 4) {
 603                 c = (c - '0') * 64;
 604                 b ++;
 605                 c += (*b - '0') * 8;
 606                 b ++;
 607                 c += *b - '0';
 608                 if (0 < c && c < 0x20)
 609                     ans.append (1, c);
 610             } else {
 611                 ans.append (1, c);
 612             }
 613         }
 614         b = m[0].second;
 615     }
 616     if (b != e)
 617         ans.append (b, e);
 618     return ans;
 619 }
 620
 621 unsigned long  strtoul (const ustring& str) {
 622     return strtoul (str.c_str (), NULL, 10);
 623 }
 624
 625 unsigned long  strtoul (const uiterator& b) {
 626     return strtoul (&*b, NULL, 10);
 627 }
 628
 629 long  strtol (const ustring& str) {
 630     return strtol (str.c_str (), NULL, 10);
 631 }
 632
 633 double  strtod (const ustring& str) {
 634     return strtod (str.c_str (), NULL);
 635 }
 636
 637 bool  passMatch (const ustring& pass, const ustring& cpass) {
 638     if (pass.length () == 0 || cpass.length () == 0)
 639         return false;
 640     return (strcmp (crypt (pass.c_str (), cpass.c_str ()), cpass.c_str ()) == 0);
 641 }
 642
 643 ustring  passCrypt (const ustring& pass) {
 644     ustring  salt = makeSalt ();
 645     return ustring (crypt (pass.c_str (), salt.c_str ()));
 646 }
 647
 648 size_t  strLength (const ustring& src) {
 649     uiterator  b, e;
 650     size_t  n = 0;
 651     b = src.begin ();
 652     e = src.end ();
 653     while (b < e) {
 654         n ++;
 655         nextChar (b, e);
 656     }
 657     return n;
 658 }
 659
 660 void  substring (const ustring& src, size_t idx, size_t len, int flen, ustring& ans) {
 661     uiterator  b, e, t;
 662     size_t  i;
 663
 664     b = src.begin ();
 665     e = src.end ();
 666     for (i = 0; i < idx && b < e; i ++)
 667         nextChar (b, e);
 668     if (flen) {
 669         t = b;
 670         for (i = 0; i < len && t < e; i ++)
 671             nextChar (t, e);
 672         ans.assign (b, t);
 673     } else {
 674         ans.assign (b, e);
 675     }
 676 }
 677
 678 static bool  jssafe[] = {
 679     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,            // 0--15
 680     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,            // 16--31
 681     1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,            // 32--47
 682     1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,            // 48--63
 683     0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,            // 64--79
 684     1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,            // 80--95
 685     0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,            // 96--111
 686     1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,            // 112--127
 687 };
 688
 689 ustring  jsEncode (const ustring& str) {
 690     int  i;
 691     ustring  u, ans;
 692     int  c, d;
 693     char  b[8];
 694
 695     u = utf8to16 (str);
 696     ans.reserve (u.size () * 3);
 697     b[0] = '\\';
 698     b[1] = 'u';
 699     for (i = 0; i < u.size (); i += 2) {
 700         c = u[i];
 701         d = u[i + 1];
 702         if (c == 0 && 0 < d && d < 127 && jssafe[d]) {
 703             ans.append (1, d);
 704         } else {
 705             b[2] = hexchar ((c >> 4) & 0x0f);
 706             b[3] = hexchar (c & 0x0f);
 707             b[4] = hexchar ((d >> 4) & 0x0f);
 708             b[5] = hexchar (d & 0x0f);
 709             ans.append (b, 6);
 710         }
 711     }
 712     return ans;
 713 }
 714
 715 ustring  filenameEncode (const ustring& text) {
 716     static uregex  re ("([\\x00-\\x1f\\x7f])|([^a-zA-Z0-9._-])|(^\\.+)");
 717     SplitterRe  sp (text, re);
 718     ustring  ans;
 719     int  c;
 720
 721     if (text.length () == 0) {
 722         throw (ustring (text).append (uErrorBadName));
 723     }
 724     ans.reserve (text.length () + 16);
 725     while (sp.next ()) {
 726         if (sp.begin () < sp.end ())
 727             ans.append (sp.begin (), sp.end ());
 728         if (sp.match (1)) {
 729         } else if (sp.match (2)) {
 730             c = *sp.matchBegin (2);
 731             ans.append (1, ':');
 732             ans.append (1, hexchar ((c >> 4) & 0x0f));
 733             ans.append (1, hexchar (c & 0x0f));
 734         } else if (sp.match (3)) {
 735             for (c = sp.matchEnd (3) - sp.matchBegin (3); c > 0; c --) {
 736                 ans.append (CharConst (":2e"));
 737             }
 738         }
 739     }
 740     if (ans.length () > 250)
 741         ans.resize (250);
 742     return ans;
 743 }
 744
 745 ustring  filenameDecode (const ustring& text) {
 746     static uregex  re (":([0-9a-fA-F][0-9a-fA-F])");
 747     SplitterRe  sp (text, re);
 748     ustring  ans;
 749     int  c;
 750
 751     ans.reserve (text.length ());
 752     while (sp.next ()) {
 753         if (sp.begin () < sp.end ())
 754             ans.append (sp.begin (), sp.end ());
 755         if (sp.match (1)) {
 756             c = hex (*(sp.matchBegin (1))) * 16 + hex (*(sp.matchBegin (1) + 1));
 757             if (32 <= c && c < 256)
 758                 ans.append (1, c);
 759         }
 760     }
 761     return ans;
 762 }
 763
 764 bool  matchSkip (uiterator& b, uiterator e, const char* t, size_t s) {
 765     if (e - b >= s && memcmp (t, &b[0], s) == 0) {
 766         b += s;
 767         return true;
 768     } else {
 769         return false;
 770     }
 771 }
 772
 773 bool  matchHead (uiterator& b, uiterator e, const char* t, size_t s) {
 774     if (e - b >= s && memcmp (t, &b[0], s) == 0) {
 775         return true;
 776     } else {
 777         return false;
 778     }
 779 }
 780
 781 bool  matchHead (const ustring& str, const char* t, size_t s) {
 782     if (str.length () >= s && memcmp (t, &*str.begin (), s) == 0) {
 783         return true;
 784     } else {
 785         return false;
 786     }
 787 }
 788
 789 bool  matchHead (const ustring& str, const ustring& head) {
 790     if (str.length () >= head.length () && memcmp (&*str.begin (), &*head.begin (), head.length ()) == 0) {
 791         return true;
 792     } else {
 793         return false;
 794     }
 795 }
 796
 797 bool  match (uiterator b, uiterator e, const char* t, size_t s) {
 798     if (e - b == s && memcmp (t, &b[0], s) == 0) {
 799         return true;
 800     } else {
 801         return false;
 802     }
 803 }
 804
 805 bool  match (const ustring& str, const char* t, size_t s) {
 806     if (str.length () == s && memcmp (t, str.data (), s) == 0) {
 807         return true;
 808     } else {
 809         return false;
 810     }
 811 }
 812
 813 bool  match (uiterator b, uiterator e, const ustring& str) {
 814     if (e - b == str.length () && memcmp (str.data (), &b[0], str.length ()) == 0) {
 815         return true;
 816     } else {
 817         return false;
 818     }
 819 }
 820
 821 bool  match (const ustring& str, const char* t, size_t s, const char* t2, size_t s2) {
 822     if (match (str, t, s) || match (str, t2, s2)) {
 823         return true;
 824     } else {
 825         return false;
 826     }
 827 }
 828
 829 ustring  clipWhite (uiterator b, uiterator e) {
 830     while (b < e)
 831         if (isblank (*b)) {
 832             b ++;
 833         } else {
 834             break;
 835         }
 836     while (b < e)
 837         if (isblank (*(e - 1))) {
 838             e --;
 839         } else {
 840             break;
 841         }
 842     return ustring (b, e);
 843 }
 844 ustring  clipWhite (const ustring& str) {
 845     return clipWhite (str.begin (), str.end ());
 846 }
 847
 848 ustring  getenvString (const char* key) {
 849     char*  e = getenv (key);
 850     if (e) {
 851         return ustring (e);
 852     } else {
 853         return uEmpty;
 854     }
 855 }
 856
 857 ustring  zeroPad (int n, const ustring& src) {
 858     int  m;
 859
 860     n = std::min (32, n);
 861     m = n - src.length ();
 862     if (m > 0) {
 863         ustring  ans;
 864         ans.reserve (m);
 865         ans.append (m, '0');
 866         ans.append (src);
 867         return ans;
 868     } else {
 869         return src;
 870     }
 871 }
 872
 873 ustring  padEmpty (const ustring& name) {
 874     if (name.empty ())
 875         return ustring (CharConst ("(null)"));
 876     else
 877         return name;
 878 }
 879
 880 uint32_t  hextoul (uiterator b, uiterator e) {
 881     uint32_t  ans = 0;
 882     int  n;
 883
 884     for (n = 0; n < 8 && b != e; n ++, b ++) {
 885         ans = (ans << 4) + hex (*b);
 886     }
 887     return ans;
 888 }
 889
 890 ustring  toCRLF (const ustring& str) {
 891     uiterator  b = str.begin ();
 892     uiterator  e = str.end ();
 893     uiterator  p;
 894     ustring  ans;
 895
 896     p = b;
 897     while (findChar (b, e, '\n')) {
 898         ans.append (p, b).append (uCRLF);
 899         p = ++ b;
 900     }
 901     if (p < e)
 902         ans.append (p, e);
 903     return ans;
 904 }
 905
 906 void  skipChar (uiterator& b, uiterator e, int ch) {
 907     while (b < e && *b == ch)
 908         ++ b;
 909 }
 910
 911 void  skipNextToChar (uiterator& b, uiterator e, int ch) {
 912     while (b < e) {
 913         if (*(b ++) == ch)
 914             return;
 915     }
 916 }
 917
 918 static ustring::value_type  toLower_ustring_value (ustring::value_type v) {
 919     if ('A' <= v && v <= 'Z') {
 920         return v - 'A' + 'a';
 921     } else {
 922         return v;
 923     }
 924 }
 925
 926 ustring  toLower (uiterator b, uiterator e) {
 927     ustring::iterator  i;
 928     ustring  ans;
 929     ans.resize (e - b);
 930     i = ans.begin ();
 931     for (; b < e; b ++, i++) {
 932         *i = toLower_ustring_value (*b);
 933     }
 934     return ans;
 935 }
 936
 937 static ustring  colpad0 (int n, const ustring& src) {
 938     int  m;
 939
 940     if (n > 0) {
 941         n = std::min (32, n);
 942         m = n - src.length ();
 943         if (m > 0) {
 944             ustring  ans;
 945             ans.reserve (n);
 946             ans.append (m, '0');
 947             ans.append (src);
 948             return ans;
 949         } else if (m == 0) {
 950             return src;
 951         } else {
 952             return ustring (src.end () - n, src.end ());
 953         }
 954     } else {
 955         return src;
 956     }
 957 }
 958
 959 /*
 960  ${Y:4}, ${Y:2}
 961  ${M:2}, ${M}, ${M:name}, ${M:ab}
 962  ${D:2}, ${D}
 963  ${h:2}, ${h}
 964  ${m:2}, ${m}
 965  ${s:2}, ${s}
 966  ${W}, ${w}
 967  ${o}
 968 */
 969 ustring  formatDateString (const ustring& format, struct tm& v) {
 970     ustring  ans;
 971     uiterator  b, e;
 972     umatch  m;
 973     int  pc;
 974     static uregex  re ("\\$\\{(([YMDhmsWwo])(:([0-9]))?|M:((name)|(ab)|(abname)))\\}");
 975     std::vector<ustring>  fpar;
 976
 977     b = format.begin ();
 978     e = format.end ();
 979     while (usearch (b, e, m, re)) {
 980         ans.append (b, m[0].first);
 981         b = m[0].second;
 982         if (m[5].matched) {
 983             if (m[6].matched) { // name
 984                 ans.append (MStr[v.tm_mon]);
 985             } else if (m[7].matched || m[8].matched) { // abname
 986                 ans.append (MStr_a[v.tm_mon]);
 987             }
 988         } else {
 989             if (m[3].matched) {
 990                 pc = strtol (ustring (m[4].first, m[4].second));
 991             } else {
 992                 pc = 0;
 993             }
 994             switch (*m[2].first) {
 995             case 'Y':
 996                 ans.append (colpad0 (pc, to_ustring (v.tm_year + 1900)));
 997                 break;
 998             case 'M':
 999                 ans.append (colpad0 (pc, to_ustring (v.tm_mon + 1)));
1000                 break;
1001             case 'D':
1002                 ans.append (colpad0 (pc, to_ustring (v.tm_mday)));
1003                 break;
1004             case 'h':
1005                 ans.append (colpad0 (pc, to_ustring (v.tm_hour)));
1006                 break;
1007             case 'm':
1008                 ans.append (colpad0 (pc, to_ustring (v.tm_min)));
1009                 break;
1010             case 's':
1011                 ans.append (colpad0 (pc, to_ustring (v.tm_sec)));
1012                 break;
1013             case 'W':
1014                 ans.append (WStr [v.tm_wday]);
1015                 break;
1016             case 'w':
1017                 ans.append (WStr_a [v.tm_wday]);
1018                 break;
1019             case 'o':
1020                 {
1021                     int  h, m;
1022                     if (v.tm_gmtoff < 0) {
1023                         h = - v.tm_gmtoff / 60;
1024                         m = h % 60;
1025                         h = h / 60;
1026                         ans.append (CharConst ("-")).append (colpad0 (4, to_ustring (h * 100 + m)));
1027                     } else {
1028                         h = v.tm_gmtoff / 60;
1029                         m = h % 60;
1030                         h = h / 60;
1031                         ans.append (CharConst ("+")).append (colpad0 (4, to_ustring (h * 100 + m)));
1032                     }
1033                 }
1034                 break;
1035             }
1036         }
1037     }
1038     ans.append (b, e);
1039
1040     return ans;
1041 }
1042
1043 ustring  toLower (const ustring& str) {
1044     return boost::to_lower_copy (str);
1045 }
1046
1047 ustring  toUpper (const ustring& str) {
1048     return boost::to_upper_copy (str);
1049 }
1050
1051 ustring  hexEncode (const ustring& data) {
1052     ustring  ans;
1053     uiterator  b, e;
1054
1055     ans.reserve (data.length () * 2);
1056     b = data.begin ();
1057     e = data.end ();
1058     for (; b < e; b ++) {
1059         ans.append (1, hexchar ((*b >> 4) & 0x0f));
1060         ans.append (1, hexchar (*b & 0x0f));
1061     }
1062     return ans;
1063 }
1064
1065 int  octchar (uiterator b) {    // 3bytes
1066     int  ans = 0;
1067     ans = *b - '0';
1068     ++ b;
1069     ans = ans * 8 + *b - '0';
1070     ++ b;
1071     ans = ans * 8 + *b - '0';
1072     return ans;
1073 }
1074
1075 ustring  octchar (int c) {
1076     ustring  ans (3, 0);
1077     ans[2] = (c & 0x7) + '0';
1078     c >>= 3;
1079     ans[1] = (c & 0x7) + '0';
1080     c >>= 3;
1081     ans[0] = (c & 0x3) + '0';
1082     return ans;
1083 }
1084
1085 bool  findNL (uiterator& b, uiterator e, uiterator& u) {
1086     for (; b < e; ++ b) {
1087         if (*b == '\n') {
1088             u = b + 1;
1089             return true;
1090         } else if (*b == '\r') {
1091             u = b + 1;
1092             if (u < e && *u == '\n')
1093                 ++ u;
1094             return true;
1095         }
1096     }
1097     u = e;
1098     return false;
1099 }
1100
1101 bool  findNLb (uiterator& b, uiterator e) {
1102     for (; b < e; ++ b) {
1103         if (*b == '\n') {
1104             ++ b;
1105             return true;
1106         } else if (*b == '\r') {
1107             ++ b;
1108             if (b < e && *b == '\n')
1109                 ++ b;
1110             return true;
1111         }
1112     }
1113     return false;
1114 }
1115
1116 bool  findChar (uiterator& b, uiterator e, int ch) {
1117     for (; b < e; ++ b) {
1118         if (*b == ch) {
1119             return true;
1120         }
1121     }
1122     return false;
1123 }
1124
1125 bool  findChars (uiterator& b, uiterator e, const ustring& pattern) {
1126     for (; b < e; ++ b) {
1127         if (pattern.find (*b) != ustring::npos) {
1128             return true;
1129         }
1130     }
1131     return false;
1132 }
1133
1134 bool  findCharFn (uiterator& b, uiterator e, bool (*fn)(int)) {
1135     for (; b < e; ++ b) {
1136         if (fn (*b))
1137             return true;
1138     }
1139     return false;
1140 }
1141
1142 bool  findSepColon (uiterator& b, uiterator e, uiterator& u) {
1143     // " *; *"を探索する。bは進む
1144     uiterator  p = b;
1145     if (findChar (b, e, ';')) {
1146         u = b + 1;
1147         while (p < b && *(b - 1) == ' ')
1148             -- b;
1149         while (u < e && *u == ' ')
1150             ++ u;
1151         return true;
1152     }
1153     u = e;
1154     return false;
1155 }
1156
1157 bool  matchHeadFn (uiterator& b, uiterator e, bool (*fn)(int)) {
1158     if (b < e && fn (*b)) {
1159         do {
1160             ++ b;
1161         } while (b < e && fn (*b));
1162         return true;
1163     }
1164     return false;
1165 }
1166
1167 bool  matchWordTbl (uiterator b, uiterator e, char* tbl) {
1168     int  c;
1169     if (b < e) {
1170         do {
1171             c = *b;
1172             if (0 <= c && c < 128 && tbl[c]) {  // 128〜はfalse
1173             } else {
1174                 return false;
1175             }
1176             ++ b;
1177         } while (b < e);
1178         return true;
1179     } else {
1180         return false;
1181     }
1182 }
1183
1184 bool  matchWordFn (uiterator b, uiterator e, bool (*fn)(int)) {
1185     int  c;
1186     if (b < e) {
1187         do {
1188             c = *b;
1189             if (0 <= c && c < 128 && fn (c)) {
1190             } else {
1191                 return false;
1192             }
1193             ++ b;
1194         } while (b < e);
1195         return true;
1196     } else {
1197         return false;
1198     }
1199 }