lib/util_string.cc

   1 #include "util_string.h"
   2 #include "util_const.h"
   3 #include "util_random.h"
   4 #include "util_splitter.h"
   5 #include "ml.h"
   6 #include "mlenv.h"
   7 #include "motorenv.h"
   8 #include "ustring.h"
   9 #include "utf8.h"
  10 #include "utf16.h"
  11 #include <boost/regex.hpp>
  12 #include <boost/regex/pattern_except.hpp>
  13 #include <boost/algorithm/string.hpp>
  14 #include <vector>
  15 #include <algorithm>
  16 #include <stdlib.h>
  17 #include <unistd.h>
  18 #include <string.h>
  19 #include <time.h>
  20 #include <float.h>
  21 #include <ctype.h>
  22
  23 UIConv::UIConv (const char* in, const char* out) {
  24     cd = iconv_open (in, out);
  25     if (cd == ICONV_ERR) {
  26         throw (ustring (in).append (CharConst (", ")).append (ustring (out)).append (CharConst (": unknown encoding.")));
  27     }
  28 }
  29
  30 ustring  UIConv::cv (const ustring& text) {
  31     ustring  ans;
  32
  33     if (cd != ICONV_ERR) {
  34         char*  buf = new char[4096];
  35         const char*  ibuf;
  36         char*  obuf;
  37         size_t  isize, osize, rsize;
  38
  39         ibuf = text.begin ().base ();
  40         isize = text.size ();
  41         while (isize > 0) {
  42             obuf = buf;
  43             osize = 4096;
  44 #ifdef Linux
  45             rsize = ::iconv (cd, (char**)&ibuf, &isize, &obuf, &osize);
  46 #else
  47             rsize = ::iconv (cd, &ibuf, &isize, &obuf, &osize);
  48 #endif
  49             if (rsize == -1) {
  50                 if (errno == EILSEQ) {
  51                     ibuf ++;
  52                     isize --;
  53                     ans.append (CharConst ("_"));
  54                 } else if (errno == EINVAL) {
  55                 } else if (errno == E2BIG) {
  56                 } else {
  57                     break;
  58                 }
  59             }
  60             if (obuf > buf)
  61                 ans.append (buf, obuf - buf);
  62         }
  63         delete buf;
  64     }
  65     return ans;
  66 }
  67
  68 ustring  c3 (const ustring& str) {
  69     bool  qsign = false;
  70     static uregex  re ("^[0-9]+");
  71     uiterator  b, e;
  72     umatch  m;
  73
  74     b = str.begin ();
  75     e = str.end ();
  76     if (str[0] == '-' || str[0] == '+') {
  77         qsign = true;
  78         b = b + 1;
  79     }
  80     if (usearch (b, e, m, re)) {
  81         int  n = m[0].second - m[0].first;
  82         int  l = str.size () + n / 3;
  83         ustring  ans;
  84
  85         ans.reserve (l);
  86         if (qsign) {
  87             ans.append (1, str[0]);
  88         }
  89         for (; b != m[0].second; b ++) {
  90             ans.append (1, *b);
  91             if (n > 1 && n % 3 == 1) {
  92                 ans.append (CharConst (","));
  93             }
  94             n --;
  95         }
  96         for (; b != e; b ++) {
  97             ans.append (1, *b);
  98         }
  99         return ans;
 100     } else {
 101         return str;
 102     }
 103 }
 104
 105 ustring  to_ustring (double val) {
 106     char  b[32];
 107     return ustring (b, snprintf (b, 32, "%.*g", DBL_DIG, val));
 108 }
 109
 110 static int  hex (char c) {
 111     if ('0' <= c && c <= '9') {
 112         return (c - '0');
 113     } else if ('a' <= c && c <= 'f') {
 114         return (c -  'a' + 10);
 115     } else if ('A' <= c && c <= 'F') {
 116         return (c - 'A' + 10);
 117     } else {
 118         return 0;
 119     }
 120 }
 121
 122 static int  hex (char c1, char c2) {
 123     return (hex (c1) * 16 + hex (c2));
 124 }
 125
 126 static char  hexchar (int c) {
 127     if (0 <= c && c <= 9)
 128         return '0' + c;
 129     else if (10 <= c <= 15)
 130         return 'a' - 10 + c;
 131     else
 132         return '0';
 133 }
 134
 135 static char  hexchar_c (int c) {
 136     if (0 <= c && c <= 9)
 137         return '0' + c;
 138     else if (10 <= c <= 15)
 139         return 'A' - 10 + c;
 140     else
 141         return '0';
 142 }
 143
 144 static ustring  percentHex (int c) {
 145     ustring  ans (3, '%');
 146
 147     ans[1] = hexchar ((c >> 4) & 0x0f);
 148     ans[2] = hexchar (c & 0x0f);
 149     return ans;
 150 }
 151
 152 static ustring  percentHEX (int c) {
 153     ustring  ans (3, '%');
 154
 155     ans[1] = hexchar_c ((c >> 4) & 0x0f);
 156     ans[2] = hexchar_c (c & 0x0f);
 157     return ans;
 158 }
 159
 160 ustring  urldecode_nonul (const ustring& str) {
 161     ustring  ans;
 162     static uregex  re ("(\\+)|%([0-9a-fA-F][0-9a-fA-F])|\\x00");
 163     umatch  m;
 164     uiterator  b, e;
 165
 166     ans.reserve (str.size ());
 167     b = str.begin ();
 168     e = str.end ();
 169     while (usearch (b, e, m, re)) {
 170         if (b != m[0].first) {
 171             ans.append (b, m[0].first);
 172         }
 173         if (m[1].matched) {
 174             ans.append (1, ' ');
 175         } else if (m[2].matched) {
 176             int  v = hex (*(m[2].first), *(m[2].first + 1));
 177             if (v != 0)
 178                 ans.append (1, v);
 179         } else {
 180         }
 181         b = m[0].second;
 182     }
 183     if (b != e) {
 184         ans.append (b, e);
 185     }
 186
 187     return ans;
 188 }
 189
 190 static ustring  omitPattern (const ustring& text, uregex& re) {
 191     Splitter  sp (text, re);
 192
 193     if (sp.next ()) {
 194         if (sp.match (0)) {
 195             ustring  ans;
 196             ans.reserve (text.length ());
 197             if (sp.begin () != sp.end ())
 198                 ans.append (sp.begin (), sp.end ());
 199             while (sp.next ()) {
 200                 if (sp.begin () != sp.end ())
 201                     ans.append (sp.begin (), sp.end ());
 202             }
 203             return ans;
 204         } else {
 205             return text;
 206         }
 207     } else {
 208         return text;
 209     }
 210 }
 211
 212 ustring  omitCtrl (const ustring& str) {
 213     static uregex  re ("[\\x00-\\x1f\\x7f]+");
 214     return omitPattern (str, re);
 215 }
 216
 217 ustring  omitCtrlX (const ustring& str) {
 218     static uregex  re ("[^\\x09\\x0a\\x20-\\x7e\\x80-\\xff]+");
 219     return omitPattern (str, re);
 220 }
 221
 222 ustring  omitNul (const ustring& str) {
 223     static uregex  re ("[\\x00]+");
 224     return omitPattern (str, re);
 225 }
 226
 227 ustring  omitNL (const ustring& str) {
 228     return omitPattern (str, re_nl);
 229 }
 230
 231 ustring  omitNonAscii (const ustring& str) {
 232     static uregex  re ("[^ -\\x7e]+");
 233     return omitPattern (str, re);
 234 }
 235
 236 ustring  omitNonAsciiWord (const ustring& str) {
 237     static uregex  re ("[^\\x21-\\x7e]+");
 238     return omitPattern (str, re);
 239 }
 240
 241 bool  to_bool (const ustring& v) {
 242     if (v.length () == 0 || (v.length () == 1 && v[0] == '0')) {
 243         return false;
 244     } else {
 245         return true;
 246     }
 247 }
 248
 249 #if 0
 250 static ustring  percentEncode (const ustring& text, uregex& re) {
 251     /* $1 -> _
 252        $2 -> %HEX
 253     */
 254     umatch  m;
 255     uiterator  b, e;
 256     ustring  ans;
 257
 258     b = text.begin ();
 259     e = text.end ();
 260     if (b != e && usearch (b, e, m, re)) {
 261         if (b != m[0].first) {
 262             ans.append (ustring (b, m[0].first));
 263         }
 264         if (m[1].matched) {
 265             ans.append (uUScore);
 266         } else if (m[2].matched) {
 267             ans.append (percentHex (*m[2].first));
 268         } else {
 269             assert (0);
 270         }
 271         b = m[0].second;
 272         while (b != e && usearch (b, e, m, re)) {
 273             if (b != m[0].first) {
 274                 ans.append (ustring (b, m[0].first));
 275             }
 276             if (m[1].matched) {
 277                 ans.append (uUScore);
 278             } else if (m[2].matched) {
 279                 ans.append (percentHex (*m[2].first));
 280             } else {
 281                 assert (0);
 282             }
 283             b = m[0].second;
 284         }
 285         if (b != e) {
 286             ans.append (ustring (b, e));
 287         }
 288         return ans;
 289     } else {
 290         return text;
 291     }
 292 }
 293 #endif
 294
 295 static ustring  percentEncode (uiterator b, uiterator e, const uregex& re) {
 296     // $1 -> _
 297     // $2 -> %HEX
 298     umatch  m;
 299     ustring  ans;
 300
 301     while (b < e && usearch (b, e, m, re)) {
 302         if (b < m[0].first)
 303             ans.append (b, m[0].first);
 304         if (m[1].matched) {
 305             ans.append (uUScore);
 306         } else if (m[2].matched) {
 307             ans.append (percentHEX (*m[2].first));
 308         } else {
 309             assert (0);
 310         }
 311         b = m[0].second;
 312     }
 313     if (b < e)
 314         ans.append (b, e);
 315
 316     return ans;
 317 }
 318
 319 #if 0
 320 ustring  urlencode (const ustring& url) {
 321     static uregex  re ("(\\x00)|([^a-zA-Z0-9_.,/\x80-\xff-])");
 322
 323     return percentEncode (url, re);
 324 }
 325 #endif
 326
 327 ustring  percentEncode (uiterator b, uiterator e) {
 328     static uregex  re ("(\\x00)|([^A-Za-z0-9_.~-])");
 329
 330     return percentEncode (b, e, re);
 331 }
 332
 333 ustring  percentEncode (const ustring& str) {
 334     return percentEncode (str.begin (), str.end ());
 335 }
 336
 337 ustring  percentEncode_path (uiterator b, uiterator e) {
 338     uiterator  i;
 339     ustring  ans;
 340
 341     for (i = b; i < e; i ++) {
 342         if (*i == '/') {
 343             if (b < i)
 344                 ans.append (percentEncode (b, i));
 345             ans.append (CharConst ("/"));
 346             b = i + 1;
 347         }
 348     }
 349     if (b < e)
 350         ans.append (percentEncode (b, e));
 351
 352     return ans;
 353 }
 354
 355 ustring  percentEncode_path (const ustring& str) {
 356     return percentEncode_path (str.begin (), str.end ());
 357 }
 358
 359 ustring  percentDecode (const ustring& str) {
 360     ustring  ans;
 361     static uregex  re ("%([0-9a-fA-F][0-9a-fA-F])|\\x00");
 362     umatch  m;
 363     uiterator  b, e;
 364
 365     b = str.begin ();
 366     e = str.end ();
 367     while (usearch (b, e, m, re)) {
 368         if (b != m[0].first) {
 369             ans.append (b, m[0].first);
 370         }
 371         if (m[1].matched) {
 372             int  v = hex (*(m[1].first), *(m[1].first + 1));
 373             if (v != 0)
 374                 ans.append (1, v);
 375         } else {
 376         }
 377         b = m[0].second;
 378     }
 379     if (b != e) {
 380         ans.append (b, e);
 381     }
 382
 383     return fixUTF8 (ans);
 384 }
 385
 386 ustring  cookieencode (const ustring& text) {
 387     static uregex  re ("([\\x00-\\x1f\\x7f])|([ ,;%\\x80-\\xff])");
 388
 389     return percentEncode (text.begin (), text.end (), re);
 390 }
 391
 392 ustring  cookiedecode (const ustring& text) {
 393     umatch  m;
 394     uiterator  b, e;
 395     ustring  ans;
 396     int  a;
 397     static uregex  re ("%([0-9a-fA-F])([0-9a-fA-F])");
 398
 399     b = text.begin ();
 400     e = text.end ();
 401     while (usearch (b, e, m, re)) {
 402         if (b != m[0].first)
 403             ans.append (ustring (b, m[0].first));
 404         a = hex (*m[1].first, *m[2].first);
 405         ans.append (1, a);
 406         b = m[0].second;
 407     }
 408     if (b != e)
 409         ans.append (ustring (b, e));
 410
 411     return ans;
 412 }
 413
 414 ustring  clipColon (const ustring& text) {
 415     int  i;
 416     ustring  ans (text);
 417
 418     for (i = 0; i < ans.size (); i ++) {
 419         if (ans[i] == ':')
 420             ans[i] = '_';
 421     }
 422     return ans;
 423 }
 424
 425 ustring  dirPart (char* path) {
 426     char*  e = rindex (path, '/');
 427
 428     if (e && e != path) {
 429         return ustring (path, e - path);
 430     } else {
 431         return uSlash;
 432     }
 433 }
 434
 435 ustring  dirPart (const ustring& path) {
 436     ustring::size_type  s = path.rfind ('/', path.size ());
 437
 438     if (s == ustring::npos) {
 439         return uSlash;
 440     } else {
 441         return ustring (path.begin (), path.begin () + s);
 442     }
 443 }
 444
 445 ustring  filePart_osSafe (const ustring& path) {
 446     umatch  m;
 447     static uregex  re ("[^\\\\/]+$");
 448
 449     if (usearch (path, m, re)) {
 450         return ustring (m[0].first, m[0].second);
 451     } else {
 452         return uEmpty;
 453     }
 454 }
 455
 456 void  split (uiterator b, uiterator e, uregex& re, std::vector<ustring>& ans) {
 457     Splitter  sp (b, e, re);
 458
 459     while (sp.next ()) {
 460         ans.push_back (sp.cur ());
 461     }
 462 }
 463
 464 void  splitE (uiterator b, uiterator e, uregex& re, std::vector<ustring>& ans) {
 465     Splitter  sp (b, e, re);
 466
 467     if (b != e) {
 468         while (sp.nextSep ()) {
 469             ans.push_back (sp.cur ());
 470         }
 471         ans.push_back (ustring (sp.begin (), sp.eol ()));
 472     }
 473 }
 474
 475 bool  splitChar (uiterator b, uiterator e, uiterator::value_type ch, uiterator& m1) {
 476     for (; b < e; b ++) {
 477         if (*b == ch) {
 478             m1 = b;
 479             return true;
 480         }
 481     }
 482     m1 = e;
 483     return false;
 484 }
 485
 486 ustring  escape_re (const ustring& text) {
 487     ustring::const_iterator  b, e;
 488     umatch  m;
 489     ustring  ans;
 490     int  c;
 491     char  buf[4];
 492     static uregex  re ("[^\\x01- !\"#%',/0-9:;<=>@A-Z_`a-z~\\x7f-\\xff-]");
 493
 494     buf[0] = '\\';
 495     buf[1] = 'x';
 496     ans.reserve (text.size () + 16);
 497     b = text.begin ();
 498     e = text.end ();
 499     while (b != e && usearch (b, e, m, re)) {
 500         if (b != m[0].first)
 501             ans.append (b, m[0].first);
 502         c = *m[0].first;
 503         buf[2] = hexchar ((c >> 4) & 0x0f);
 504         buf[3] = hexchar (c & 0x0f);
 505         ans.append (buf, 4);
 506         b = m[0].second;
 507     }
 508     if (b != e)
 509         ans.append (b, e);
 510     return ans;
 511 }
 512
 513 ustring  slashEncode (const ustring& text) {
 514     ustring::const_iterator  b, e;
 515     umatch  m;
 516     ustring  ans;
 517     int  c;
 518     char  buf[4];
 519     static uregex  re ("([\\x00-\\x1f\\x7f])|(\\\\)|(\")");
 520
 521     buf[0] = '\\';
 522     buf[1] = 'x';
 523     b = text.begin ();
 524     e = text.end ();
 525     while (b != e && usearch (b, e, m, re)) {
 526         if (b != m[0].first)
 527             ans.append (b, m[0].first);
 528         if (m[1].matched) {
 529             c = *m[0].first;
 530             switch (c) {
 531             case '\t':
 532                 ans.append (CharConst ("\\t"));
 533                 break;
 534             case '\r':
 535                 ans.append (CharConst ("\\r"));
 536                 break;
 537             case '\n':
 538                 ans.append (CharConst ("\\n"));
 539                 break;
 540             default:
 541                 buf[2] = hexchar ((c >> 4) & 0x0f);
 542                 buf[3] = hexchar (c & 0x0f);
 543                 ans.append (buf, 4);
 544             }
 545         } else if (m[2].matched) {
 546             ans.append (CharConst ("\\\\"));
 547         } else if (m[3].matched) {
 548             ans.append (CharConst ("\\\""));
 549         } else {
 550             assert (0);
 551         }
 552         b = m[0].second;
 553     }
 554     if (b != e)
 555         ans.append (b, e);
 556     return ans;
 557 }
 558
 559 ustring  slashDecode (const ustring& text) {
 560     ustring::const_iterator  b, e;
 561     umatch  m;
 562     ustring  ans;
 563     int  c;
 564     static uregex  re ("\\\\([0-7][0-7][0-7]|[\\x00-\\x7f])");
 565
 566     b = text.begin ();
 567     e = text.end ();
 568     while (b != e && usearch (b, e, m, re)) {
 569         if (b != m[0].first)
 570             ans.append (b, m[0].first);
 571         b = m[0].first + 1;
 572         c = *b;
 573         switch (c) {
 574         case 't':
 575             ans.append (CharConst ("\t"));
 576             break;
 577         case 'r':
 578             ans.append (CharConst ("\r"));
 579             break;
 580         case 'n':
 581             ans.append (CharConst ("\n"));
 582             break;
 583         default:
 584             if (m[0].second - m[0].first == 4) {
 585                 c = (c - '0') * 64;
 586                 b ++;
 587                 c += (*b - '0') * 8;
 588                 b ++;
 589                 c += *b - '0';
 590                 if (0 < c && c < 0x20)
 591                     ans.append (1, c);
 592             } else {
 593                 ans.append (1, c);
 594             }
 595         }
 596         b = m[0].second;
 597     }
 598     if (b != e)
 599         ans.append (b, e);
 600     return ans;
 601 }
 602
 603 unsigned long  strtoul (const ustring& str) {
 604     return strtoul (str.c_str (), NULL, 10);
 605 }
 606
 607 unsigned long  strtoul (const uiterator& b) {
 608     return strtoul (&*b, NULL, 10);
 609 }
 610
 611 long  strtol (const ustring& str) {
 612     return strtol (str.c_str (), NULL, 10);
 613 }
 614
 615 double  strtod (const ustring& str) {
 616     return strtod (str.c_str (), NULL);
 617 }
 618
 619 bool  passMatch (const ustring& pass, const ustring& cpass) {
 620     if (pass.length () == 0 || cpass.length () == 0)
 621         return false;
 622     return (strcmp (crypt (pass.c_str (), cpass.c_str ()), cpass.c_str ()) == 0);
 623 }
 624
 625 ustring  passCrypt (const ustring& pass) {
 626     ustring  salt = makeSalt ();
 627     return ustring (crypt (pass.c_str (), salt.c_str ()));
 628 }
 629
 630 size_t  strLength (const ustring& src) {
 631     uiterator  b, e;
 632     size_t  n = 0;
 633     b = src.begin ();
 634     e = src.end ();
 635     while (b < e) {
 636         n ++;
 637         nextChar (b, e);
 638     }
 639     return n;
 640 }
 641
 642 void  substring (const ustring& src, size_t idx, size_t len, int flen, ustring& ans) {
 643     uiterator  b, e, t;
 644     size_t  i;
 645
 646     b = src.begin ();
 647     e = src.end ();
 648     for (i = 0; i < idx && b < e; i ++)
 649         nextChar (b, e);
 650     if (flen) {
 651         t = b;
 652         for (i = 0; i < len && t < e; i ++)
 653             nextChar (t, e);
 654         ans.assign (b, t);
 655     } else {
 656         ans.assign (b, e);
 657     }
 658 }
 659
 660 static bool  jssafe[] = {
 661     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,            // 0--15
 662     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,            // 16--31
 663     1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,            // 32--47
 664     1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,            // 48--63
 665     0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,            // 64--79
 666     1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,            // 80--95
 667     0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,            // 96--111
 668     1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,            // 112--127
 669 };
 670
 671 ustring  jsEncode (const ustring& str) {
 672     int  i;
 673     ustring  u, ans;
 674     int  c, d;
 675     char  b[8];
 676
 677     u = utf8to16 (str);
 678     ans.reserve (u.size () * 3);
 679     b[0] = '\\';
 680     b[1] = 'u';
 681     for (i = 0; i < u.size (); i += 2) {
 682         c = u[i];
 683         d = u[i + 1];
 684         if (c == 0 && 0 < d && d < 127 && jssafe[d]) {
 685             ans.append (1, d);
 686         } else {
 687             b[2] = hexchar ((c >> 4) & 0x0f);
 688             b[3] = hexchar (c & 0x0f);
 689             b[4] = hexchar ((d >> 4) & 0x0f);
 690             b[5] = hexchar (d & 0x0f);
 691             ans.append (b, 6);
 692         }
 693     }
 694     return ans;
 695 }
 696
 697 ustring  filenameEncode (const ustring& text) {
 698     static uregex  re ("([\\x00-\\x1f\\x7f])|([^a-zA-Z0-9._-])|(^\\.+)");
 699     Splitter  sp (text, re);
 700     ustring  ans;
 701     int  c;
 702
 703     if (text.length () == 0) {
 704         throw (ustring (text).append (uErrorBadName));
 705     }
 706     ans.reserve (text.length () + 16);
 707     while (sp.next ()) {
 708         if (sp.begin () < sp.end ())
 709             ans.append (sp.begin (), sp.end ());
 710         if (sp.match (1)) {
 711         } else if (sp.match (2)) {
 712             c = *sp.matchBegin (2);
 713             ans.append (1, ':');
 714             ans.append (1, hexchar ((c >> 4) & 0x0f));
 715             ans.append (1, hexchar (c & 0x0f));
 716         } else if (sp.match (3)) {
 717             for (c = sp.matchEnd (3) - sp.matchBegin (3); c > 0; c --) {
 718                 ans.append (CharConst (":2e"));
 719             }
 720         }
 721     }
 722     if (ans.length () > 250)
 723         ans.resize (250);
 724     return ans;
 725 }
 726
 727 ustring  filenameDecode (const ustring& text) {
 728     static uregex  re (":([0-9a-fA-F][0-9a-fA-F])");
 729     Splitter  sp (text, re);
 730     ustring  ans;
 731     int  c;
 732
 733     ans.reserve (text.length ());
 734     while (sp.next ()) {
 735         if (sp.begin () < sp.end ())
 736             ans.append (sp.begin (), sp.end ());
 737         if (sp.match (1)) {
 738             c = hex (*(sp.matchBegin (1))) * 16 + hex (*(sp.matchBegin (1) + 1));
 739             if (32 <= c && c < 256)
 740                 ans.append (1, c);
 741         }
 742     }
 743     return ans;
 744 }
 745
 746 bool  matchSkip (uiterator& b, uiterator e, const char* t, size_t s) {
 747     if (e - b >= s && memcmp (t, &b[0], s) == 0) {
 748         b += s;
 749         return true;
 750     } else {
 751         return false;
 752     }
 753 }
 754
 755 bool  matchHead (uiterator& b, uiterator e, const char* t, size_t s) {
 756     if (e - b >= s && memcmp (t, &b[0], s) == 0) {
 757         return true;
 758     } else {
 759         return false;
 760     }
 761 }
 762
 763 bool  matchHead (const ustring& str, const char* t, size_t s) {
 764     if (str.length () >= s && memcmp (t, &*str.begin (), s) == 0) {
 765         return true;
 766     } else {
 767         return false;
 768     }
 769 }
 770
 771 bool  matchHead (const ustring& str, const ustring& head) {
 772     if (str.length () >= head.length () && memcmp (&*str.begin (), &*head.begin (), head.length ()) == 0) {
 773         return true;
 774     } else {
 775         return false;
 776     }
 777 }
 778
 779 bool  match (uiterator b, uiterator e, const char* t, size_t s) {
 780     if (e - b == s && memcmp (t, &b[0], s) == 0) {
 781         return true;
 782     } else {
 783         return false;
 784     }
 785 }
 786
 787 bool  match (const ustring& str, const char* t, size_t s) {
 788     if (str.length () == s && memcmp (t, str.data (), s) == 0) {
 789         return true;
 790     } else {
 791         return false;
 792     }
 793 }
 794
 795 bool  match (uiterator b, uiterator e, const ustring& str) {
 796     if (e - b == str.length () && memcmp (str.data (), &b[0], str.length ()) == 0) {
 797         return true;
 798     } else {
 799         return false;
 800     }
 801 }
 802
 803 bool  match (const ustring& str, const char* t, size_t s, const char* t2, size_t s2) {
 804     if (match (str, t, s) || match (str, t2, s2)) {
 805         return true;
 806     } else {
 807         return false;
 808     }
 809 }
 810
 811 ustring  clipWhite (uiterator b, uiterator e) {
 812     while (b < e)
 813         if (isblank (*b)) {
 814             b ++;
 815         } else {
 816             break;
 817         }
 818     while (b < e)
 819         if (isblank (*(e - 1))) {
 820             e --;
 821         } else {
 822             break;
 823         }
 824     return ustring (b, e);
 825 }
 826 ustring  clipWhite (const ustring& str) {
 827     return clipWhite (str.begin (), str.end ());
 828 }
 829
 830 ustring  getenvString (const char* key) {
 831     char*  e = getenv (key);
 832     if (e) {
 833         return ustring (e);
 834     } else {
 835         return uEmpty;
 836     }
 837 }
 838
 839 ustring  zeroPad (int n, const ustring& src) {
 840     int  m;
 841
 842     n = std::min (32, n);
 843     m = n - src.length ();
 844     if (m > 0) {
 845         ustring  ans;
 846         ans.reserve (m);
 847         ans.append (m, '0');
 848         ans.append (src);
 849         return ans;
 850     } else {
 851         return src;
 852     }
 853 }
 854
 855 bool  wsearch (const ustring& text, boost::wsmatch& m, const ustring& reg, boost::wregex::flag_type reg_flags, boost::match_flag_type search_flags) {
 856     try {
 857         std::wstring  wtext = utow (text);
 858         std::wstring  wreg = utow (reg);
 859         boost::wregex  wre (wreg, reg_flags);
 860         return regex_search (wtext, m, wre, search_flags);
 861     } catch (boost::regex_error& err) {
 862         throw (uErrorRegexp);
 863     }
 864 }
 865
 866 bool  wsearch_env (MlEnv* mlenv, const ustring& text, const ustring& reg, boost::wregex::flag_type reg_flags, boost::match_flag_type search_flags) {
 867     try {
 868         mlenv->regtext = utow (text);
 869         std::wstring  wreg = utow (reg);
 870         boost::wregex  wre (wreg, reg_flags);
 871         return regex_search (mlenv->regtext, mlenv->regmatch, wre, search_flags);
 872     } catch (boost::regex_error& err) {
 873         throw (uErrorRegexp);
 874     }
 875 }
 876
 877 ustring  wreplace (const ustring& text, const ustring& reg, const ustring& fmt, boost::wregex::flag_type reg_flags, boost::match_flag_type match_flags) {
 878     try {
 879         std::wstring  wtext = utow (text);
 880         std::wstring  wreg = utow (reg);
 881         std::wstring  wfmt = utow (fmt);
 882         boost::wregex  wre (wreg, reg_flags);
 883         std::wstring  ans = regex_replace (wtext, wre, wfmt, match_flags);
 884         return wtou (ans);
 885     } catch (boost::regex_error& err) {
 886         throw (uErrorRegexp);
 887     }
 888 }
 889
 890 ustring  padEmpty (const ustring& name) {
 891     if (name.empty ())
 892         return ustring (CharConst ("(null)"));
 893     else
 894         return name;
 895 }
 896
 897 uint32_t  hextoul (uiterator b, uiterator e) {
 898     uint32_t  ans = 0;
 899     int  n;
 900
 901     for (n = 0; n < 8 && b != e; n ++, b ++) {
 902         ans = (ans << 4) + hex (*b);
 903     }
 904     return ans;
 905 }
 906
 907 ustring  toCRLF (const ustring& str) {
 908     uiterator  b = str.begin ();
 909     uiterator  e = str.end ();
 910     umatch  m;
 911     ustring  ans;
 912
 913     while (usearch (b, e, m, re_lf)) {
 914         ans.append (b, m[0].first).append (uCRLF);
 915         b = m[0].second;
 916     }
 917     ans.append (b, e);
 918     return ans;
 919 }
 920
 921 void  skipSpace (uiterator& b, uiterator e) {
 922     while (b < e && *b == ' ') {
 923         b ++;
 924     }
 925 }
 926
 927 static ustring::value_type  toLower_ustring_value (ustring::value_type v) {
 928     if ('A' <= v && v <= 'Z') {
 929         return v - 'A' + 'a';
 930     } else {
 931         return v;
 932     }
 933 }
 934
 935 #if 0
 936 void  toLower (ustring::iterator* b, ustring::iterator* e) {
 937     transform (*b, *e, *b, toLower_ustring_value);
 938 }
 939 #endif
 940
 941 ustring  toLower (uiterator b, uiterator e) {
 942     ustring::iterator  i;
 943     ustring  ans;
 944     ans.resize (e - b);
 945     i = ans.begin ();
 946     for (; b < e; b ++, i++) {
 947         *i = toLower_ustring_value (*b);
 948     }
 949     return ans;
 950 }
 951
 952 static void  format_hex (ustring& ans, MNode* a, std::vector<ustring>& par, bool fcap) {
 953     uint32_t  v = 0;
 954     char  buf[32];
 955
 956     if (a)
 957         v = to_int (a);
 958
 959     if (par.size () > 0) {
 960         int  p = strtol (par[0]);
 961         if (p < 0)
 962             p = 1;
 963         if (p > 20)
 964             p = 20;
 965         if (fcap)
 966             ans.append (buf, snprintf (buf, 32, "%.*X", p, v));
 967         else
 968             ans.append (buf, snprintf (buf, 32, "%.*x", p, v));
 969     } else {
 970         if (fcap)
 971             ans.append (buf, snprintf (buf, 32, "%X", v));
 972         else
 973             ans.append (buf, snprintf (buf, 32, "%x", v));
 974     }
 975 }
 976
 977 static void  format_hex (ustring& ans, MNode* a, std::vector<ustring>& par) {
 978     format_hex (ans, a, par, false);
 979 }
 980
 981 static void  format_HEX (ustring& ans, MNode* a, std::vector<ustring>& par) {
 982     format_hex (ans, a, par, true);
 983 }
 984
 985 static void  format_int_sub (ustring& ans, MNode* a, std::vector<ustring>& par, bool pad0 = false) {
 986     int32_t  v = 0;
 987     char  buf[32];
 988     size_t  s;
 989
 990     if (a)
 991         v = to_int (a);
 992
 993     if (par.size () > 0) {
 994         bool  fclip = false;
 995         bool  fzero = pad0;
 996         bool  fc3 = false;
 997         if (match (par[0], CharConst ("comma")) || match (par[0], CharConst ("c"))) {
 998             ans.append (c3 (to_ustring (v)));
 999         } else {
1000             int  p = strtol (par[0]);
1001             if (p < 0)
1002                 p = 1;
1003             if (p > 20)
1004                 p = 20;
1005             for (int i = 1; i < par.size (); i ++) {
1006                 if (match (par[i], CharConst ("clip"))) {
1007                     fclip = true;
1008                 } else if (match (par[i], CharConst ("0"))) {
1009                     fzero = true;
1010                 } else if (match (par[i], CharConst ("comma")) || match (par[i], CharConst ("c"))) {
1011                     fc3 = true;
1012                 } else {
1013                     throw (par[i] + uErrorBadParam);
1014                 }
1015             }
1016             if (fzero)
1017                 s = snprintf (buf, 32, "%.*d", p, v);
1018             else
1019                 s = snprintf (buf, 32, "%*d", p, v);
1020             if (fclip && s > p)
1021                 ans.append (buf + s - p, p);
1022             else if (! fclip && fc3)
1023                 ans.append (c3 (ustring (buf, s)));
1024             else
1025                 ans.append (buf, s);
1026         }
1027     } else {
1028         ans.append (to_ustring (v));
1029     }
1030 }
1031
1032 static void  format_int (ustring& ans, MNode* a, std::vector<ustring>& par) {
1033     format_int_sub (ans, a, par);
1034 }
1035
1036 static void  format_int0 (ustring& ans, MNode* a, std::vector<ustring>& par) {
1037     format_int_sub (ans, a, par, true);
1038 }
1039
1040 static void  format_int (ustring& ans, MNode* a, int c, bool pad0 = false) {
1041     int32_t  v = 0;
1042     char  buf[32];
1043     size_t  s;
1044
1045     if (a)
1046         v = to_int (a);
1047
1048     if (c > 0) {
1049         if (c > 20)
1050             c = 20;
1051         if (pad0)
1052             s = snprintf (buf, 32, "%.*d", c, v);
1053         else
1054             s = snprintf (buf, 32, "%*d", c, v);
1055         if (s > c)
1056             ans.append (buf + s - c, c);
1057         else
1058             ans.append (buf, s);
1059     } else {
1060         ans.append (to_ustring (v));
1061     }
1062 }
1063
1064 static void  format_float (ustring& ans, MNode* a, std::vector<ustring>& par) {
1065     int  p1 = 0;
1066     int  p2 = 0;
1067     char  buf[32];
1068
1069     if (par.size () > 0)
1070         p1 = strtol (par[0]);
1071     if (par.size () > 1)
1072         p2 = strtol (par[1]);
1073     if (p1 < 0)
1074         p1 = 0;
1075     if (p2 < 0)
1076         p2 = 0;
1077     if (p1 > 20)
1078         p1 = 20;
1079     if (p2 > 20)
1080         p2 = 20;
1081     ans.append (buf, snprintf (buf, 32, "%*.*lf", p1, p2, to_double (a)));
1082 }
1083
1084 static void  format_string (ustring& ans, MNode* a, std::vector<ustring>& par) {
1085     int  p = 0;
1086     bool  fright = false;
1087     ustring  u = to_string (a);
1088
1089     if (par.size () > 0)
1090         p = strtol (par[0]);
1091     if (p > 65536)
1092         p = 65536;
1093     if (par.size () > 1) {
1094         if (match (par[1], CharConst ("right")) || match (par[1], CharConst ("r")))
1095             fright = true;
1096         else
1097             throw (par[1] + uErrorBadParam);
1098     }
1099     if (fright) {
1100         if (u.size () < p)
1101             ans.append (p - u.size (), ' ').append (u);
1102         else
1103             ans.append (u);
1104     } else {
1105         if (u.size () < p)
1106             ans.append (u).append (p - u.size (), ' ');
1107         else
1108             ans.append (u);
1109     }
1110 }
1111
1112 static void  format_literal (ustring& ans, MNode* a, const char* list[], int offset, size_t size) {
1113     int  v;
1114
1115     if (a) {
1116         v = to_int (a) - offset;
1117         if (0 <= v && v < size)
1118             ans.append (list[v]);
1119     }
1120 }
1121
1122 static const char*  mstr_a[] = {
1123     "Jan", "Feb", "Mar", "Apr",
1124     "May", "Jun", "Jul", "Aug",
1125     "Sep", "Oct", "Nov", "Dec"
1126 };
1127 static const char*  mstr[] = {
1128     "January", "February", "March", "April",
1129     "May", "June", "July", "August",
1130     "September", "October", "November", "December"
1131 };
1132 static void  format_month (ustring& ans, MNode* a, std::vector<ustring>& par) {
1133     format_literal (ans, a, mstr_a, 1, 12);
1134 }
1135
1136 static void  format_Month (ustring& ans, MNode* a, std::vector<ustring>& par) {
1137     format_literal (ans, a, mstr, 1, 12);
1138 }
1139
1140 static const char*  WStr_a[] = {
1141     "Sun", "Mon", "Tue", "Wed",
1142     "Thu", "Fri", "Sat"
1143 };
1144
1145 static const char*  WStr[] = {
1146     "Sunday", "Monday", "Tuesday", "Wednesday",
1147     "Thursday", "Friday", "Saturday"
1148 };
1149
1150 static void  format_week (ustring& ans, MNode* a, std::vector<ustring>& par) {
1151     format_literal (ans, a, WStr_a, 0, 7);
1152 }
1153
1154 static void  format_Week (ustring& ans, MNode* a, std::vector<ustring>& par) {
1155     format_literal (ans, a, WStr, 0, 7);
1156 }
1157
1158 ustring  formatString (const ustring& format, boost::ptr_vector<MNodePtr>& par) {
1159     ustring  ans;
1160     uiterator  b, e;
1161     umatch  m;
1162     u_int  i;
1163     MNode*  a;
1164     static uregex  re ("\\$\\{([1-9][0-9]*)(:([a-zA-Z][a-zA-Z0-9]*)(:([0-9a-z.:]+))?)?\\}");
1165     static struct {
1166         const char* name;
1167         size_t  namelen;
1168         void  (*fn)(ustring& ans, MNode* a, std::vector<ustring>& par);
1169     }  formatFunc[] = {
1170         {CharConst ("hex"), format_hex},
1171         {CharConst ("HEX"), format_HEX},
1172         {CharConst ("int"), format_int},
1173         {CharConst ("int0"), format_int0},
1174         {CharConst ("float"), format_float},
1175         {CharConst ("string"), format_string},
1176         {CharConst ("month"), format_month},
1177         {CharConst ("Month"), format_Month},
1178         {CharConst ("week"), format_week},
1179         {CharConst ("Week"), format_Week},
1180         {NULL, 0, NULL}
1181     };
1182
1183     b = format.begin ();
1184     e = format.end ();
1185     while (usearch (b, e, m, re)) {
1186         ans.append (b, m[0].first);
1187         b = m[0].second;
1188         i = strtoul (ustring (m[1].first, m[1].second)) - 1;
1189         if (i < par.size ()) {
1190             a = par[i] ();
1191         } else {
1192             a = NULL;
1193         }
1194         if (! m[2].matched) {
1195             if (a)
1196                 ans.append (to_string (a));
1197         } else {
1198             std::vector<ustring>  fpar;
1199             int  i;
1200             if (m[4].matched)
1201                 split (m[5].first, m[5].second, re_colon, fpar);
1202             for (i = 0; formatFunc[i].name; i ++) {
1203                 if (match (m[3].first, m[3].second, formatFunc[i].name, formatFunc[i].namelen)) {
1204                     (*formatFunc[i].fn) (ans, a, fpar);
1205                     goto Bp1;
1206                 }
1207             }
1208             ans.append (m[0].first, m[0].second);
1209         Bp1:;
1210         }
1211     }
1212     ans.append (b, e);
1213
1214     return ans;
1215 }
1216
1217 static ustring  colpad0 (int n, const ustring& src) {
1218     int  m;
1219
1220     if (n > 0) {
1221         n = std::min (32, n);
1222         m = n - src.length ();
1223         if (m > 0) {
1224             ustring  ans;
1225             ans.reserve (n);
1226             ans.append (m, '0');
1227             ans.append (src);
1228             return ans;
1229         } else if (m == 0) {
1230             return src;
1231         } else {
1232             return ustring (src.end () - n, src.end ());
1233         }
1234     } else {
1235         return src;
1236     }
1237 }
1238
1239 /*
1240  ${Y:4}, ${Y:2}
1241  ${M:2}, ${M}, ${M:name}, ${M:ab}
1242  ${D:2}, ${D}
1243  ${h:2}, ${h}
1244  ${m:2}, ${m}
1245  ${s:2}, ${s}
1246  ${W}, ${w}
1247  ${o}
1248 */
1249 //ustring  formatDateString (const ustring& format, time_t tm) {
1250 ustring  formatDateString (const ustring& format, struct tm& v) {
1251     ustring  ans;
1252 //    struct tm  v;
1253     uiterator  b, e;
1254     umatch  m;
1255     int  pc;
1256 //    static uregex  re ("\\$\\{([YMDhmsWw])(:([0-9]))?\\}");
1257     static uregex  re ("\\$\\{(([YMDhmsWwo])(:([0-9]))?|M:((name)|(ab)|(abname)))\\}");
1258     std::vector<ustring>  fpar;
1259
1260 //    localtime_r (&tm, &v);
1261     b = format.begin ();
1262     e = format.end ();
1263     while (usearch (b, e, m, re)) {
1264         ans.append (b, m[0].first);
1265         b = m[0].second;
1266         if (m[5].matched) {
1267             if (m[6].matched) { // name
1268                 ans.append (mstr[v.tm_mon]);
1269             } else if (m[7].matched || m[8].matched) { // abname
1270                 ans.append (mstr_a[v.tm_mon]);
1271             }
1272         } else {
1273 //          if (m[2].matched) {
1274             if (m[3].matched) {
1275 //              pc = strtol (ustring (m[3].first, m[3].second));
1276                 pc = strtol (ustring (m[4].first, m[4].second));
1277             } else {
1278                 pc = 0;
1279             }
1280 //          switch (*m[1].first) {
1281             switch (*m[2].first) {
1282             case 'Y':
1283                 ans.append (colpad0 (pc, to_ustring (v.tm_year + 1900)));
1284                 break;
1285             case 'M':
1286                 ans.append (colpad0 (pc, to_ustring (v.tm_mon + 1)));
1287                 break;
1288             case 'D':
1289                 ans.append (colpad0 (pc, to_ustring (v.tm_mday)));
1290                 break;
1291             case 'h':
1292                 ans.append (colpad0 (pc, to_ustring (v.tm_hour)));
1293                 break;
1294             case 'm':
1295                 ans.append (colpad0 (pc, to_ustring (v.tm_min)));
1296                 break;
1297             case 's':
1298                 ans.append (colpad0 (pc, to_ustring (v.tm_sec)));
1299                 break;
1300             case 'W':
1301                 ans.append (WStr [v.tm_wday]);
1302                 break;
1303             case 'w':
1304                 ans.append (WStr_a [v.tm_wday]);
1305                 break;
1306             case 'o':
1307                 {
1308                     int  h, m;
1309                     if (v.tm_gmtoff < 0) {
1310                         h = - v.tm_gmtoff / 60;
1311                         m = h % 60;
1312                         h = h / 60;
1313                         ans.append (CharConst ("-")).append (colpad0 (4, to_ustring (h * 100 + m)));
1314                     } else {
1315                         h = v.tm_gmtoff / 60;
1316                         m = h % 60;
1317                         h = h / 60;
1318                         ans.append (CharConst ("+")).append (colpad0 (4, to_ustring (h * 100 + m)));
1319                     }
1320                 }
1321                 break;
1322             }
1323         }
1324     }
1325     ans.append (b, e);
1326
1327     return ans;
1328 }
1329
1330 ustring  toLower (const ustring& str) {
1331     return boost::to_lower_copy (str);
1332 }
1333
1334 ustring  toUpper (const ustring& str) {
1335     return boost::to_upper_copy (str);
1336 }
1337
1338 ustring  hexEncode (const ustring& data) {
1339     ustring  ans;
1340     uiterator  b, e;
1341
1342     ans.reserve (data.length () * 2);
1343     b = data.begin ();
1344     e = data.end ();
1345     for (; b < e; b ++) {
1346         ans.append (1, hexchar ((*b >> 4) & 0x0f));
1347         ans.append (1, hexchar (*b & 0x0f));
1348     }
1349     return ans;
1350 }