lib/util_string.cc

   1 #include "util_string.h"
   2 #include "util_const.h"
   3 #include "util_random.h"
   4 #include "util_splitter.h"
   5 #include "ml.h"
   6 #include "mlenv.h"
   7 #include "motorenv.h"
   8 #include "ustring.h"
   9 #include "utf8.h"
  10 #include "utf16.h"
  11 #include <boost/regex.hpp>
  12 #include <boost/regex/pattern_except.hpp>
  13 #include <boost/algorithm/string.hpp>
  14 #include <vector>
  15 #include <algorithm>
  16 #include <stdlib.h>
  17 #include <unistd.h>
  18 #include <string.h>
  19 #include <time.h>
  20 #include <float.h>
  21 #include <ctype.h>
  22
  23 UIConv::UIConv (const char* in, const char* out) {
  24     cd = iconv_open (in, out);
  25     if (cd == ICONV_ERR) {
  26         throw (ustring (in).append (CharConst (", ")).append (ustring (out)).append (CharConst (": unknown encoding.")));
  27     }
  28 }
  29
  30 ustring  UIConv::cv (const ustring& text) {
  31     ustring  ans;
  32
  33     if (cd != ICONV_ERR) {
  34         char*  buf = new char[4096];
  35         const char*  ibuf;
  36         char*  obuf;
  37         size_t  isize, osize, rsize;
  38
  39         ibuf = text.begin ().base ();
  40         isize = text.size ();
  41         while (isize > 0) {
  42             obuf = buf;
  43             osize = 4096;
  44 #ifdef Linux
  45             rsize = ::iconv (cd, (char**)&ibuf, &isize, &obuf, &osize);
  46 #else
  47             rsize = ::iconv (cd, &ibuf, &isize, &obuf, &osize);
  48 #endif
  49             if (rsize == -1) {
  50                 if (errno == EILSEQ) {
  51                     ibuf ++;
  52                     isize --;
  53                     ans.append (CharConst ("_"));
  54                 } else if (errno == EINVAL) {
  55                 } else if (errno == E2BIG) {
  56                 } else {
  57                     break;
  58                 }
  59             }
  60             if (obuf > buf)
  61                 ans.append (buf, obuf - buf);
  62         }
  63         delete buf;
  64     }
  65     return ans;
  66 }
  67
  68 ustring  c3 (const ustring& str) {
  69     bool  qsign = false;
  70     static uregex  re ("^[0-9]+");
  71     uiterator  b, e;
  72     umatch  m;
  73
  74     b = str.begin ();
  75     e = str.end ();
  76     if (str[0] == '-' || str[0] == '+') {
  77         qsign = true;
  78         b = b + 1;
  79     }
  80     if (usearch (b, e, m, re)) {
  81         int  n = m[0].second - m[0].first;
  82         int  l = str.size () + n / 3;
  83         ustring  ans;
  84
  85         ans.reserve (l);
  86         if (qsign) {
  87             ans.append (1, str[0]);
  88         }
  89         for (; b != m[0].second; b ++) {
  90             ans.append (1, *b);
  91             if (n > 1 && n % 3 == 1) {
  92                 ans.append (CharConst (","));
  93             }
  94             n --;
  95         }
  96         for (; b != e; b ++) {
  97             ans.append (1, *b);
  98         }
  99         return ans;
 100     } else {
 101         return str;
 102     }
 103 }
 104
 105 ustring  to_ustring (double val) {
 106     char  b[32];
 107     return ustring (b, snprintf (b, 32, "%.*g", DBL_DIG, val));
 108 }
 109
 110 static int  hex (char c) {
 111     if ('0' <= c && c <= '9') {
 112         return (c - '0');
 113     } else if ('a' <= c && c <= 'f') {
 114         return (c -  'a' + 10);
 115     } else if ('A' <= c && c <= 'F') {
 116         return (c - 'A' + 10);
 117     } else {
 118         return 0;
 119     }
 120 }
 121
 122 static int  hex (char c1, char c2) {
 123     return (hex (c1) * 16 + hex (c2));
 124 }
 125
 126 static char  hexchar (int c) {
 127     if (0 <= c && c <= 9)
 128         return '0' + c;
 129     else if (10 <= c <= 15)
 130         return 'a' - 10 + c;
 131     else
 132         return '0';
 133 }
 134
 135 static char  hexchar_c (int c) {
 136     if (0 <= c && c <= 9)
 137         return '0' + c;
 138     else if (10 <= c <= 15)
 139         return 'A' - 10 + c;
 140     else
 141         return '0';
 142 }
 143
 144 static ustring  percentHex (int c) {
 145     ustring  ans (3, '%');
 146
 147     ans[1] = hexchar ((c >> 4) & 0x0f);
 148     ans[2] = hexchar (c & 0x0f);
 149     return ans;
 150 }
 151
 152 static ustring  percentHEX (int c) {
 153     ustring  ans (3, '%');
 154
 155     ans[1] = hexchar_c ((c >> 4) & 0x0f);
 156     ans[2] = hexchar_c (c & 0x0f);
 157     return ans;
 158 }
 159
 160 ustring  urldecode_nonul (const ustring& str) {
 161     ustring  ans;
 162     static uregex  re ("(\\+)|%([0-9a-fA-F][0-9a-fA-F])|\\x00");
 163     umatch  m;
 164     uiterator  b, e;
 165
 166     ans.reserve (str.size ());
 167     b = str.begin ();
 168     e = str.end ();
 169     while (usearch (b, e, m, re)) {
 170         if (b != m[0].first) {
 171             ans.append (b, m[0].first);
 172         }
 173         if (m[1].matched) {
 174             ans.append (1, ' ');
 175         } else if (m[2].matched) {
 176             int  v = hex (*(m[2].first), *(m[2].first + 1));
 177             if (v != 0)
 178                 ans.append (1, v);
 179         } else {
 180         }
 181         b = m[0].second;
 182     }
 183     if (b != e) {
 184         ans.append (b, e);
 185     }
 186
 187     return ans;
 188 }
 189
 190 static ustring  omitPattern (const ustring& text, uregex& re) {
 191     Splitter  sp (text, re);
 192
 193     if (sp.next ()) {
 194         if (sp.match (0)) {
 195             ustring  ans;
 196             ans.reserve (text.length ());
 197             if (sp.begin () != sp.end ())
 198                 ans.append (sp.begin (), sp.end ());
 199             while (sp.next ()) {
 200                 if (sp.begin () != sp.end ())
 201                     ans.append (sp.begin (), sp.end ());
 202             }
 203             return ans;
 204         } else {
 205             return text;
 206         }
 207     } else {
 208         return text;
 209     }
 210 }
 211
 212 ustring  omitCtrl (const ustring& str) {
 213     static uregex  re ("[\\x00-\\x1f\\x7f]+");
 214     return omitPattern (str, re);
 215 }
 216
 217 ustring  omitNul (const ustring& str) {
 218     static uregex  re ("[\\x00]+");
 219     return omitPattern (str, re);
 220 }
 221
 222 ustring  omitNL (const ustring& str) {
 223     return omitPattern (str, re_nl);
 224 }
 225
 226 ustring  omitNonAscii (const ustring& str) {
 227     static uregex  re ("[^ -\\x7e]+");
 228     return omitPattern (str, re);
 229 }
 230
 231 ustring  omitNonAsciiWord (const ustring& str) {
 232     static uregex  re ("[^\\x21-\\x7e]+");
 233     return omitPattern (str, re);
 234 }
 235
 236 bool  to_bool (const ustring& v) {
 237     if (v.length () == 0 || (v.length () == 1 && v[0] == '0')) {
 238         return false;
 239     } else {
 240         return true;
 241     }
 242 }
 243
 244 #if 0
 245 static ustring  percentEncode (const ustring& text, uregex& re) {
 246     /* $1 -> _
 247        $2 -> %HEX
 248     */
 249     umatch  m;
 250     uiterator  b, e;
 251     ustring  ans;
 252
 253     b = text.begin ();
 254     e = text.end ();
 255     if (b != e && usearch (b, e, m, re)) {
 256         if (b != m[0].first) {
 257             ans.append (ustring (b, m[0].first));
 258         }
 259         if (m[1].matched) {
 260             ans.append (uUScore);
 261         } else if (m[2].matched) {
 262             ans.append (percentHex (*m[2].first));
 263         } else {
 264             assert (0);
 265         }
 266         b = m[0].second;
 267         while (b != e && usearch (b, e, m, re)) {
 268             if (b != m[0].first) {
 269                 ans.append (ustring (b, m[0].first));
 270             }
 271             if (m[1].matched) {
 272                 ans.append (uUScore);
 273             } else if (m[2].matched) {
 274                 ans.append (percentHex (*m[2].first));
 275             } else {
 276                 assert (0);
 277             }
 278             b = m[0].second;
 279         }
 280         if (b != e) {
 281             ans.append (ustring (b, e));
 282         }
 283         return ans;
 284     } else {
 285         return text;
 286     }
 287 }
 288 #endif
 289
 290 static ustring  percentEncode (uiterator b, uiterator e, const uregex& re) {
 291     // $1 -> _
 292     // $2 -> %HEX
 293     umatch  m;
 294     ustring  ans;
 295
 296     while (b < e && usearch (b, e, m, re)) {
 297         if (b < m[0].first)
 298             ans.append (b, m[0].first);
 299         if (m[1].matched) {
 300             ans.append (uUScore);
 301         } else if (m[2].matched) {
 302             ans.append (percentHEX (*m[2].first));
 303         } else {
 304             assert (0);
 305         }
 306         b = m[0].second;
 307     }
 308     if (b < e)
 309         ans.append (b, e);
 310
 311     return ans;
 312 }
 313
 314 #if 0
 315 ustring  urlencode (const ustring& url) {
 316     static uregex  re ("(\\x00)|([^a-zA-Z0-9_.,/\x80-\xff-])");
 317
 318     return percentEncode (url, re);
 319 }
 320 #endif
 321
 322 ustring  percentEncode (uiterator b, uiterator e) {
 323     static uregex  re ("(\\x00)|([^A-Za-z0-9_.~-])");
 324
 325     return percentEncode (b, e, re);
 326 }
 327
 328 ustring  percentEncode (const ustring& str) {
 329     return percentEncode (str.begin (), str.end ());
 330 }
 331
 332 ustring  percentEncode_path (uiterator b, uiterator e) {
 333     uiterator  i;
 334     ustring  ans;
 335
 336     for (i = b; i < e; i ++) {
 337         if (*i == '/') {
 338             if (b < i)
 339                 ans.append (percentEncode (b, i));
 340             ans.append (CharConst ("/"));
 341             b = i + 1;
 342         }
 343     }
 344     if (b < e)
 345         ans.append (percentEncode (b, e));
 346
 347     return ans;
 348 }
 349
 350 ustring  percentEncode_path (const ustring& str) {
 351     return percentEncode_path (str.begin (), str.end ());
 352 }
 353
 354 ustring  percentDecode (const ustring& str) {
 355     ustring  ans;
 356     static uregex  re ("%([0-9a-fA-F][0-9a-fA-F])|\\x00");
 357     umatch  m;
 358     uiterator  b, e;
 359
 360     b = str.begin ();
 361     e = str.end ();
 362     while (usearch (b, e, m, re)) {
 363         if (b != m[0].first) {
 364             ans.append (b, m[0].first);
 365         }
 366         if (m[1].matched) {
 367             int  v = hex (*(m[1].first), *(m[1].first + 1));
 368             if (v != 0)
 369                 ans.append (1, v);
 370         } else {
 371         }
 372         b = m[0].second;
 373     }
 374     if (b != e) {
 375         ans.append (b, e);
 376     }
 377
 378     return fixUTF8 (ans);
 379 }
 380
 381 ustring  cookieencode (const ustring& text) {
 382     static uregex  re ("([\\x00-\\x1f\\x7f])|([ ,;%\\x80-\\xff])");
 383
 384     return percentEncode (text.begin (), text.end (), re);
 385 }
 386
 387 ustring  cookiedecode (const ustring& text) {
 388     umatch  m;
 389     uiterator  b, e;
 390     ustring  ans;
 391     int  a;
 392     static uregex  re ("%([0-9a-fA-F])([0-9a-fA-F])");
 393
 394     b = text.begin ();
 395     e = text.end ();
 396     while (usearch (b, e, m, re)) {
 397         if (b != m[0].first)
 398             ans.append (ustring (b, m[0].first));
 399         a = hex (*m[1].first, *m[2].first);
 400         ans.append (1, a);
 401         b = m[0].second;
 402     }
 403     if (b != e)
 404         ans.append (ustring (b, e));
 405
 406     return ans;
 407 }
 408
 409 ustring  clipColon (const ustring& text) {
 410     int  i;
 411     ustring  ans (text);
 412
 413     for (i = 0; i < ans.size (); i ++) {
 414         if (ans[i] == ':')
 415             ans[i] = '_';
 416     }
 417     return ans;
 418 }
 419
 420 ustring  dirPart (char* path) {
 421     char*  e = rindex (path, '/');
 422
 423     if (e && e != path) {
 424         return ustring (path, e - path);
 425     } else {
 426         return uSlash;
 427     }
 428 }
 429
 430 ustring  dirPart (const ustring& path) {
 431     ustring::size_type  s = path.rfind ('/', path.size ());
 432
 433     if (s == ustring::npos) {
 434         return uSlash;
 435     } else {
 436         return ustring (path.begin (), path.begin () + s);
 437     }
 438 }
 439
 440 ustring  filePart_osSafe (const ustring& path) {
 441     umatch  m;
 442     static uregex  re ("[^\\\\/]+$");
 443
 444     if (usearch (path, m, re)) {
 445         return ustring (m[0].first, m[0].second);
 446     } else {
 447         return uEmpty;
 448     }
 449 }
 450
 451 void  split (uiterator b, uiterator e, uregex& re, std::vector<ustring>& ans) {
 452     Splitter  sp (b, e, re);
 453
 454     while (sp.next ()) {
 455         ans.push_back (sp.cur ());
 456     }
 457 }
 458
 459 void  splitE (uiterator b, uiterator e, uregex& re, std::vector<ustring>& ans) {
 460     Splitter  sp (b, e, re);
 461
 462     if (b != e) {
 463         while (sp.nextSep ()) {
 464             ans.push_back (sp.cur ());
 465         }
 466         ans.push_back (ustring (sp.begin (), sp.eol ()));
 467     }
 468 }
 469
 470 bool  splitChar (uiterator b, uiterator e, uiterator::value_type ch, uiterator& m1) {
 471     for (; b < e; b ++) {
 472         if (*b == ch) {
 473             m1 = b;
 474             return true;
 475         }
 476     }
 477     m1 = e;
 478     return false;
 479 }
 480
 481 ustring  escape_re (const ustring& text) {
 482     ustring::const_iterator  b, e;
 483     umatch  m;
 484     ustring  ans;
 485     int  c;
 486     char  buf[4];
 487     static uregex  re ("[^\\x01- !\"#%',/0-9:;<=>@A-Z_`a-z~\\x7f-\\xff-]");
 488
 489     buf[0] = '\\';
 490     buf[1] = 'x';
 491     ans.reserve (text.size () + 16);
 492     b = text.begin ();
 493     e = text.end ();
 494     while (b != e && usearch (b, e, m, re)) {
 495         if (b != m[0].first)
 496             ans.append (b, m[0].first);
 497         c = *m[0].first;
 498         buf[2] = hexchar ((c >> 4) & 0x0f);
 499         buf[3] = hexchar (c & 0x0f);
 500         ans.append (buf, 4);
 501         b = m[0].second;
 502     }
 503     if (b != e)
 504         ans.append (b, e);
 505     return ans;
 506 }
 507
 508 ustring  slashEncode (const ustring& text) {
 509     ustring::const_iterator  b, e;
 510     umatch  m;
 511     ustring  ans;
 512     int  c;
 513     char  buf[4];
 514     static uregex  re ("([\\x00-\\x1f\\x7f])|(\\\\)|(\")");
 515
 516     buf[0] = '\\';
 517     buf[1] = 'x';
 518     b = text.begin ();
 519     e = text.end ();
 520     while (b != e && usearch (b, e, m, re)) {
 521         if (b != m[0].first)
 522             ans.append (b, m[0].first);
 523         if (m[1].matched) {
 524             c = *m[0].first;
 525             switch (c) {
 526             case '\t':
 527                 ans.append (CharConst ("\\t"));
 528                 break;
 529             case '\r':
 530                 ans.append (CharConst ("\\r"));
 531                 break;
 532             case '\n':
 533                 ans.append (CharConst ("\\n"));
 534                 break;
 535             default:
 536                 buf[2] = hexchar ((c >> 4) & 0x0f);
 537                 buf[3] = hexchar (c & 0x0f);
 538                 ans.append (buf, 4);
 539             }
 540         } else if (m[2].matched) {
 541             ans.append (CharConst ("\\\\"));
 542         } else if (m[3].matched) {
 543             ans.append (CharConst ("\\\""));
 544         } else {
 545             assert (0);
 546         }
 547         b = m[0].second;
 548     }
 549     if (b != e)
 550         ans.append (b, e);
 551     return ans;
 552 }
 553
 554 ustring  slashDecode (const ustring& text) {
 555     ustring::const_iterator  b, e;
 556     umatch  m;
 557     ustring  ans;
 558     int  c;
 559     static uregex  re ("\\\\([0-7][0-7][0-7]|[\\x00-\\x7f])");
 560
 561     b = text.begin ();
 562     e = text.end ();
 563     while (b != e && usearch (b, e, m, re)) {
 564         if (b != m[0].first)
 565             ans.append (b, m[0].first);
 566         b = m[0].first + 1;
 567         c = *b;
 568         switch (c) {
 569         case 't':
 570             ans.append (CharConst ("\t"));
 571             break;
 572         case 'r':
 573             ans.append (CharConst ("\r"));
 574             break;
 575         case 'n':
 576             ans.append (CharConst ("\n"));
 577             break;
 578         default:
 579             if (m[0].second - m[0].first == 4) {
 580                 c = (c - '0') * 64;
 581                 b ++;
 582                 c += (*b - '0') * 8;
 583                 b ++;
 584                 c += *b - '0';
 585                 if (0 < c && c < 0x20)
 586                     ans.append (1, c);
 587             } else {
 588                 ans.append (1, c);
 589             }
 590         }
 591         b = m[0].second;
 592     }
 593     if (b != e)
 594         ans.append (b, e);
 595     return ans;
 596 }
 597
 598 unsigned long  strtoul (const ustring& str) {
 599     return strtoul (str.c_str (), NULL, 10);
 600 }
 601
 602 unsigned long  strtoul (const uiterator& b) {
 603     return strtoul (&*b, NULL, 10);
 604 }
 605
 606 long  strtol (const ustring& str) {
 607     return strtol (str.c_str (), NULL, 10);
 608 }
 609
 610 double  strtod (const ustring& str) {
 611     return strtod (str.c_str (), NULL);
 612 }
 613
 614 bool  passMatch (const ustring& pass, const ustring& cpass) {
 615     if (pass.length () == 0 || cpass.length () == 0)
 616         return false;
 617     return (strcmp (crypt (pass.c_str (), cpass.c_str ()), cpass.c_str ()) == 0);
 618 }
 619
 620 ustring  passCrypt (const ustring& pass) {
 621     ustring  salt = makeSalt ();
 622     return ustring (crypt (pass.c_str (), salt.c_str ()));
 623 }
 624
 625 size_t  strLength (const ustring& src) {
 626     uiterator  b, e;
 627     size_t  n = 0;
 628     b = src.begin ();
 629     e = src.end ();
 630     while (b < e) {
 631         n ++;
 632         nextChar (b, e);
 633     }
 634     return n;
 635 }
 636
 637 void  substring (const ustring& src, size_t idx, size_t len, int flen, ustring& ans) {
 638     uiterator  b, e, t;
 639     size_t  i;
 640
 641     b = src.begin ();
 642     e = src.end ();
 643     for (i = 0; i < idx && b < e; i ++)
 644         nextChar (b, e);
 645     if (flen) {
 646         t = b;
 647         for (i = 0; i < len && t < e; i ++)
 648             nextChar (t, e);
 649         ans.assign (b, t);
 650     } else {
 651         ans.assign (b, e);
 652     }
 653 }
 654
 655 static bool  jssafe[] = {
 656     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,            // 0--15
 657     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,            // 16--31
 658     1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,            // 32--47
 659     1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,            // 48--63
 660     0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,            // 64--79
 661     1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,            // 80--95
 662     0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,            // 96--111
 663     1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,            // 112--127
 664 };
 665
 666 ustring  jsEncode (const ustring& str) {
 667     int  i;
 668     ustring  u, ans;
 669     int  c, d;
 670     char  b[8];
 671
 672     u = utf8to16 (str);
 673     ans.reserve (u.size () * 3);
 674     b[0] = '\\';
 675     b[1] = 'u';
 676     for (i = 0; i < u.size (); i += 2) {
 677         c = u[i];
 678         d = u[i + 1];
 679         if (c == 0 && 0 < d && d < 127 && jssafe[d]) {
 680             ans.append (1, d);
 681         } else {
 682             b[2] = hexchar ((c >> 4) & 0x0f);
 683             b[3] = hexchar (c & 0x0f);
 684             b[4] = hexchar ((d >> 4) & 0x0f);
 685             b[5] = hexchar (d & 0x0f);
 686             ans.append (b, 6);
 687         }
 688     }
 689     return ans;
 690 }
 691
 692 ustring  filenameEncode (const ustring& text) {
 693     static uregex  re ("([\\x00-\\x1f\\x7f])|([^a-zA-Z0-9._-])|(^\\.+)");
 694     Splitter  sp (text, re);
 695     ustring  ans;
 696     int  c;
 697
 698     if (text.length () == 0) {
 699         throw (ustring (text).append (uErrorBadName));
 700     }
 701     ans.reserve (text.length () + 16);
 702     while (sp.next ()) {
 703         if (sp.begin () < sp.end ())
 704             ans.append (sp.begin (), sp.end ());
 705         if (sp.match (1)) {
 706         } else if (sp.match (2)) {
 707             c = *sp.matchBegin (2);
 708             ans.append (1, ':');
 709             ans.append (1, hexchar ((c >> 4) & 0x0f));
 710             ans.append (1, hexchar (c & 0x0f));
 711         } else if (sp.match (3)) {
 712             for (c = sp.matchEnd (3) - sp.matchBegin (3); c > 0; c --) {
 713                 ans.append (CharConst (":2e"));
 714             }
 715         }
 716     }
 717     if (ans.length () > 250)
 718         ans.resize (250);
 719     return ans;
 720 }
 721
 722 ustring  filenameDecode (const ustring& text) {
 723     static uregex  re (":([0-9a-fA-F][0-9a-fA-F])");
 724     Splitter  sp (text, re);
 725     ustring  ans;
 726     int  c;
 727
 728     ans.reserve (text.length ());
 729     while (sp.next ()) {
 730         if (sp.begin () < sp.end ())
 731             ans.append (sp.begin (), sp.end ());
 732         if (sp.match (1)) {
 733             c = hex (*(sp.matchBegin (1))) * 16 + hex (*(sp.matchBegin (1) + 1));
 734             if (32 <= c && c < 256)
 735                 ans.append (1, c);
 736         }
 737     }
 738     return ans;
 739 }
 740
 741 bool  matchSkip (uiterator& b, uiterator e, const char* t, size_t s) {
 742     if (e - b >= s && memcmp (t, &b[0], s) == 0) {
 743         b += s;
 744         return true;
 745     } else {
 746         return false;
 747     }
 748 }
 749
 750 bool  matchHead (uiterator& b, uiterator e, const char* t, size_t s) {
 751     if (e - b >= s && memcmp (t, &b[0], s) == 0) {
 752         return true;
 753     } else {
 754         return false;
 755     }
 756 }
 757
 758 bool  matchHead (const ustring& str, const char* t, size_t s) {
 759     if (str.length () >= s && memcmp (t, &*str.begin (), s) == 0) {
 760         return true;
 761     } else {
 762         return false;
 763     }
 764 }
 765
 766 bool  matchHead (const ustring& str, const ustring& head) {
 767     if (str.length () >= head.length () && memcmp (&*str.begin (), &*head.begin (), head.length ()) == 0) {
 768         return true;
 769     } else {
 770         return false;
 771     }
 772 }
 773
 774 bool  match (uiterator b, uiterator e, const char* t, size_t s) {
 775     if (e - b == s && memcmp (t, &b[0], s) == 0) {
 776         return true;
 777     } else {
 778         return false;
 779     }
 780 }
 781
 782 bool  match (const ustring& str, const char* t, size_t s) {
 783     if (str.length () == s && memcmp (t, str.data (), s) == 0) {
 784         return true;
 785     } else {
 786         return false;
 787     }
 788 }
 789
 790 bool  match (uiterator b, uiterator e, const ustring& str) {
 791     if (e - b == str.length () && memcmp (str.data (), &b[0], str.length ()) == 0) {
 792         return true;
 793     } else {
 794         return false;
 795     }
 796 }
 797
 798 bool  match (const ustring& str, const char* t, size_t s, const char* t2, size_t s2) {
 799     if (match (str, t, s) || match (str, t2, s2)) {
 800         return true;
 801     } else {
 802         return false;
 803     }
 804 }
 805
 806 ustring  clipWhite (uiterator b, uiterator e) {
 807     while (b < e)
 808         if (isblank (*b)) {
 809             b ++;
 810         } else {
 811             break;
 812         }
 813     while (b < e)
 814         if (isblank (*(e - 1))) {
 815             e --;
 816         } else {
 817             break;
 818         }
 819     return ustring (b, e);
 820 }
 821 ustring  clipWhite (const ustring& str) {
 822     return clipWhite (str.begin (), str.end ());
 823 }
 824
 825 ustring  getenvString (const char* key) {
 826     char*  e = getenv (key);
 827     if (e) {
 828         return ustring (e);
 829     } else {
 830         return uEmpty;
 831     }
 832 }
 833
 834 ustring  zeroPad (int n, const ustring& src) {
 835     int  m;
 836
 837     n = std::min (32, n);
 838     m = n - src.length ();
 839     if (m > 0) {
 840         ustring  ans;
 841         ans.reserve (m);
 842         ans.append (m, '0');
 843         ans.append (src);
 844         return ans;
 845     } else {
 846         return src;
 847     }
 848 }
 849
 850 bool  wsearch (const ustring& text, boost::wsmatch& m, const ustring& reg, boost::wregex::flag_type reg_flags, boost::match_flag_type search_flags) {
 851     try {
 852         std::wstring  wtext = utow (text);
 853         std::wstring  wreg = utow (reg);
 854         boost::wregex  wre (wreg, reg_flags);
 855         return regex_search (wtext, m, wre, search_flags);
 856     } catch (boost::regex_error& err) {
 857         throw (uErrorRegexp);
 858     }
 859 }
 860
 861 bool  wsearch_env (MlEnv* mlenv, const ustring& text, const ustring& reg, boost::wregex::flag_type reg_flags, boost::match_flag_type search_flags) {
 862     try {
 863         mlenv->regtext = utow (text);
 864         std::wstring  wreg = utow (reg);
 865         boost::wregex  wre (wreg, reg_flags);
 866         return regex_search (mlenv->regtext, mlenv->regmatch, wre, search_flags);
 867     } catch (boost::regex_error& err) {
 868         throw (uErrorRegexp);
 869     }
 870 }
 871
 872 ustring  wreplace (const ustring& text, const ustring& reg, const ustring& fmt, boost::wregex::flag_type reg_flags, boost::match_flag_type match_flags) {
 873     try {
 874         std::wstring  wtext = utow (text);
 875         std::wstring  wreg = utow (reg);
 876         std::wstring  wfmt = utow (fmt);
 877         boost::wregex  wre (wreg, reg_flags);
 878         std::wstring  ans = regex_replace (wtext, wre, wfmt, match_flags);
 879         return wtou (ans);
 880     } catch (boost::regex_error& err) {
 881         throw (uErrorRegexp);
 882     }
 883 }
 884
 885 ustring  padEmpty (const ustring& name) {
 886     if (name.empty ())
 887         return ustring (CharConst ("(null)"));
 888     else
 889         return name;
 890 }
 891
 892 uint32_t  hextoul (uiterator b, uiterator e) {
 893     uint32_t  ans = 0;
 894     int  n;
 895
 896     for (n = 0; n < 8 && b != e; n ++, b ++) {
 897         ans = (ans << 4) + hex (*b);
 898     }
 899     return ans;
 900 }
 901
 902 ustring  toCRLF (const ustring& str) {
 903     uiterator  b = str.begin ();
 904     uiterator  e = str.end ();
 905     umatch  m;
 906     ustring  ans;
 907
 908     while (usearch (b, e, m, re_lf)) {
 909         ans.append (b, m[0].first).append (uCRLF);
 910         b = m[0].second;
 911     }
 912     ans.append (b, e);
 913     return ans;
 914 }
 915
 916 void  skipSpace (uiterator& b, uiterator e) {
 917     while (b < e && *b == ' ') {
 918         b ++;
 919     }
 920 }
 921
 922 static ustring::value_type  toLower_ustring_value (ustring::value_type v) {
 923     if ('A' <= v && v <= 'Z') {
 924         return v - 'A' + 'a';
 925     } else {
 926         return v;
 927     }
 928 }
 929
 930 #if 0
 931 void  toLower (ustring::iterator* b, ustring::iterator* e) {
 932     transform (*b, *e, *b, toLower_ustring_value);
 933 }
 934 #endif
 935
 936 ustring  toLower (uiterator b, uiterator e) {
 937     ustring::iterator  i;
 938     ustring  ans;
 939     ans.resize (e - b);
 940     i = ans.begin ();
 941     for (; b < e; b ++, i++) {
 942         *i = toLower_ustring_value (*b);
 943     }
 944     return ans;
 945 }
 946
 947 static void  format_hex (ustring& ans, MNode* a, std::vector<ustring>& par, bool fcap) {
 948     uint32_t  v = 0;
 949     char  buf[32];
 950
 951     if (a)
 952         v = to_int (a);
 953
 954     if (par.size () > 0) {
 955         int  p = strtol (par[0]);
 956         if (p < 0)
 957             p = 1;
 958         if (p > 20)
 959             p = 20;
 960         if (fcap)
 961             ans.append (buf, snprintf (buf, 32, "%.*X", p, v));
 962         else
 963             ans.append (buf, snprintf (buf, 32, "%.*x", p, v));
 964     } else {
 965         if (fcap)
 966             ans.append (buf, snprintf (buf, 32, "%X", v));
 967         else
 968             ans.append (buf, snprintf (buf, 32, "%x", v));
 969     }
 970 }
 971
 972 static void  format_hex (ustring& ans, MNode* a, std::vector<ustring>& par) {
 973     format_hex (ans, a, par, false);
 974 }
 975
 976 static void  format_HEX (ustring& ans, MNode* a, std::vector<ustring>& par) {
 977     format_hex (ans, a, par, true);
 978 }
 979
 980 static void  format_int_sub (ustring& ans, MNode* a, std::vector<ustring>& par, bool pad0 = false) {
 981     int32_t  v = 0;
 982     char  buf[32];
 983     size_t  s;
 984
 985     if (a)
 986         v = to_int (a);
 987
 988     if (par.size () > 0) {
 989         bool  fclip = false;
 990         bool  fzero = pad0;
 991         bool  fc3 = false;
 992         if (match (par[0], CharConst ("comma")) || match (par[0], CharConst ("c"))) {
 993             ans.append (c3 (to_ustring (v)));
 994         } else {
 995             int  p = strtol (par[0]);
 996             if (p < 0)
 997                 p = 1;
 998             if (p > 20)
 999                 p = 20;
1000             for (int i = 1; i < par.size (); i ++) {
1001                 if (match (par[i], CharConst ("clip"))) {
1002                     fclip = true;
1003                 } else if (match (par[i], CharConst ("0"))) {
1004                     fzero = true;
1005                 } else if (match (par[i], CharConst ("comma")) || match (par[i], CharConst ("c"))) {
1006                     fc3 = true;
1007                 } else {
1008                     throw (par[i] + uErrorBadParam);
1009                 }
1010             }
1011             if (fzero)
1012                 s = snprintf (buf, 32, "%.*d", p, v);
1013             else
1014                 s = snprintf (buf, 32, "%*d", p, v);
1015             if (fclip && s > p)
1016                 ans.append (buf + s - p, p);
1017             else if (! fclip && fc3)
1018                 ans.append (c3 (ustring (buf, s)));
1019             else
1020                 ans.append (buf, s);
1021         }
1022     } else {
1023         ans.append (to_ustring (v));
1024     }
1025 }
1026
1027 static void  format_int (ustring& ans, MNode* a, std::vector<ustring>& par) {
1028     format_int_sub (ans, a, par);
1029 }
1030
1031 static void  format_int0 (ustring& ans, MNode* a, std::vector<ustring>& par) {
1032     format_int_sub (ans, a, par, true);
1033 }
1034
1035 static void  format_int (ustring& ans, MNode* a, int c, bool pad0 = false) {
1036     int32_t  v = 0;
1037     char  buf[32];
1038     size_t  s;
1039
1040     if (a)
1041         v = to_int (a);
1042
1043     if (c > 0) {
1044         if (c > 20)
1045             c = 20;
1046         if (pad0)
1047             s = snprintf (buf, 32, "%.*d", c, v);
1048         else
1049             s = snprintf (buf, 32, "%*d", c, v);
1050         if (s > c)
1051             ans.append (buf + s - c, c);
1052         else
1053             ans.append (buf, s);
1054     } else {
1055         ans.append (to_ustring (v));
1056     }
1057 }
1058
1059 static void  format_float (ustring& ans, MNode* a, std::vector<ustring>& par) {
1060     int  p1 = 0;
1061     int  p2 = 0;
1062     char  buf[32];
1063
1064     if (par.size () > 0)
1065         p1 = strtol (par[0]);
1066     if (par.size () > 1)
1067         p2 = strtol (par[1]);
1068     if (p1 < 0)
1069         p1 = 0;
1070     if (p2 < 0)
1071         p2 = 0;
1072     if (p1 > 20)
1073         p1 = 20;
1074     if (p2 > 20)
1075         p2 = 20;
1076     ans.append (buf, snprintf (buf, 32, "%*.*lf", p1, p2, to_double (a)));
1077 }
1078
1079 static void  format_string (ustring& ans, MNode* a, std::vector<ustring>& par) {
1080     int  p = 0;
1081     bool  fright = false;
1082     ustring  u = to_string (a);
1083
1084     if (par.size () > 0)
1085         p = strtol (par[0]);
1086     if (p > 65536)
1087         p = 65536;
1088     if (par.size () > 1) {
1089         if (match (par[1], CharConst ("right")) || match (par[1], CharConst ("r")))
1090             fright = true;
1091         else
1092             throw (par[1] + uErrorBadParam);
1093     }
1094     if (fright) {
1095         if (u.size () < p)
1096             ans.append (p - u.size (), ' ').append (u);
1097         else
1098             ans.append (u);
1099     } else {
1100         if (u.size () < p)
1101             ans.append (u).append (p - u.size (), ' ');
1102         else
1103             ans.append (u);
1104     }
1105 }
1106
1107 static void  format_literal (ustring& ans, MNode* a, const char* list[], int offset, size_t size) {
1108     int  v;
1109
1110     if (a) {
1111         v = to_int (a) - offset;
1112         if (0 <= v && v < size)
1113             ans.append (list[v]);
1114     }
1115 }
1116
1117 static const char*  mstr_a[] = {
1118     "Jan", "Feb", "Mar", "Apr",
1119     "May", "Jun", "Jul", "Aug",
1120     "Sep", "Oct", "Nov", "Dec"
1121 };
1122 static const char*  mstr[] = {
1123     "January", "February", "March", "April",
1124     "May", "June", "July", "August",
1125     "September", "October", "November", "December"
1126 };
1127 static void  format_month (ustring& ans, MNode* a, std::vector<ustring>& par) {
1128     format_literal (ans, a, mstr_a, 1, 12);
1129 }
1130
1131 static void  format_Month (ustring& ans, MNode* a, std::vector<ustring>& par) {
1132     format_literal (ans, a, mstr, 1, 12);
1133 }
1134
1135 static const char*  WStr_a[] = {
1136     "Sun", "Mon", "Tue", "Wed",
1137     "Thu", "Fri", "Sat"
1138 };
1139
1140 static const char*  WStr[] = {
1141     "Sunday", "Monday", "Tuesday", "Wednesday",
1142     "Thursday", "Friday", "Saturday"
1143 };
1144
1145 static void  format_week (ustring& ans, MNode* a, std::vector<ustring>& par) {
1146     format_literal (ans, a, WStr_a, 0, 7);
1147 }
1148
1149 static void  format_Week (ustring& ans, MNode* a, std::vector<ustring>& par) {
1150     format_literal (ans, a, WStr, 0, 7);
1151 }
1152
1153 ustring  formatString (const ustring& format, boost::ptr_vector<MNodePtr>& par) {
1154     ustring  ans;
1155     uiterator  b, e;
1156     umatch  m;
1157     u_int  i;
1158     MNode*  a;
1159     static uregex  re ("\\$\\{([1-9][0-9]*)(:([a-zA-Z][a-zA-Z0-9]*)(:([0-9a-z.:]+))?)?\\}");
1160     static struct {
1161         const char* name;
1162         size_t  namelen;
1163         void  (*fn)(ustring& ans, MNode* a, std::vector<ustring>& par);
1164     }  formatFunc[] = {
1165         {CharConst ("hex"), format_hex},
1166         {CharConst ("HEX"), format_HEX},
1167         {CharConst ("int"), format_int},
1168         {CharConst ("int0"), format_int0},
1169         {CharConst ("float"), format_float},
1170         {CharConst ("string"), format_string},
1171         {CharConst ("month"), format_month},
1172         {CharConst ("Month"), format_Month},
1173         {CharConst ("week"), format_week},
1174         {CharConst ("Week"), format_Week},
1175         {NULL, 0, NULL}
1176     };
1177
1178     b = format.begin ();
1179     e = format.end ();
1180     while (usearch (b, e, m, re)) {
1181         ans.append (b, m[0].first);
1182         b = m[0].second;
1183         i = strtoul (ustring (m[1].first, m[1].second)) - 1;
1184         if (i < par.size ()) {
1185             a = par[i] ();
1186         } else {
1187             a = NULL;
1188         }
1189         if (! m[2].matched) {
1190             if (a)
1191                 ans.append (to_string (a));
1192         } else {
1193             std::vector<ustring>  fpar;
1194             int  i;
1195             if (m[4].matched)
1196                 split (m[5].first, m[5].second, re_colon, fpar);
1197             for (i = 0; formatFunc[i].name; i ++) {
1198                 if (match (m[3].first, m[3].second, formatFunc[i].name, formatFunc[i].namelen)) {
1199                     (*formatFunc[i].fn) (ans, a, fpar);
1200                     goto Bp1;
1201                 }
1202             }
1203             ans.append (m[0].first, m[0].second);
1204         Bp1:;
1205         }
1206     }
1207     ans.append (b, e);
1208
1209     return ans;
1210 }
1211
1212 static ustring  colpad0 (int n, const ustring& src) {
1213     int  m;
1214
1215     if (n > 0) {
1216         n = std::min (32, n);
1217         m = n - src.length ();
1218         if (m > 0) {
1219             ustring  ans;
1220             ans.reserve (n);
1221             ans.append (m, '0');
1222             ans.append (src);
1223             return ans;
1224         } else if (m == 0) {
1225             return src;
1226         } else {
1227             return ustring (src.end () - n, src.end ());
1228         }
1229     } else {
1230         return src;
1231     }
1232 }
1233
1234 /*
1235  ${Y:4}, ${Y:2}
1236  ${M:2}, ${M}, ${M:name}, ${M:ab}
1237  ${D:2}, ${D}
1238  ${h:2}, ${h}
1239  ${m:2}, ${m}
1240  ${s:2}, ${s}
1241  ${W}, ${w}
1242  ${o}
1243 */
1244 ustring  formatDateString (const ustring& format, time_t tm) {
1245     ustring  ans;
1246     struct tm  v;
1247     uiterator  b, e;
1248     umatch  m;
1249     int  pc;
1250 //    static uregex  re ("\\$\\{([YMDhmsWw])(:([0-9]))?\\}");
1251     static uregex  re ("\\$\\{(([YMDhmsWwo])(:([0-9]))?|M:((name)|(ab)|(abname)))\\}");
1252     std::vector<ustring>  fpar;
1253
1254     localtime_r (&tm, &v);
1255     b = format.begin ();
1256     e = format.end ();
1257     while (usearch (b, e, m, re)) {
1258         ans.append (b, m[0].first);
1259         b = m[0].second;
1260         if (m[5].matched) {
1261             if (m[6].matched) { // name
1262                 ans.append (mstr[v.tm_mon]);
1263             } else if (m[7].matched || m[8].matched) { // abname
1264                 ans.append (mstr_a[v.tm_mon]);
1265             }
1266         } else {
1267 //          if (m[2].matched) {
1268             if (m[3].matched) {
1269 //              pc = strtol (ustring (m[3].first, m[3].second));
1270                 pc = strtol (ustring (m[4].first, m[4].second));
1271             } else {
1272                 pc = 0;
1273             }
1274 //          switch (*m[1].first) {
1275             switch (*m[2].first) {
1276             case 'Y':
1277                 ans.append (colpad0 (pc, to_ustring (v.tm_year + 1900)));
1278                 break;
1279             case 'M':
1280                 ans.append (colpad0 (pc, to_ustring (v.tm_mon + 1)));
1281                 break;
1282             case 'D':
1283                 ans.append (colpad0 (pc, to_ustring (v.tm_mday)));
1284                 break;
1285             case 'h':
1286                 ans.append (colpad0 (pc, to_ustring (v.tm_hour)));
1287                 break;
1288             case 'm':
1289                 ans.append (colpad0 (pc, to_ustring (v.tm_min)));
1290                 break;
1291             case 's':
1292                 ans.append (colpad0 (pc, to_ustring (v.tm_sec)));
1293                 break;
1294             case 'W':
1295                 ans.append (WStr [v.tm_wday]);
1296                 break;
1297             case 'w':
1298                 ans.append (WStr_a [v.tm_wday]);
1299                 break;
1300             case 'o':
1301                 {
1302                     int  h, m;
1303                     if (v.tm_gmtoff < 0) {
1304                         h = - v.tm_gmtoff / 60;
1305                         m = h % 60;
1306                         h = h / 60;
1307                         ans.append (CharConst ("-")).append (colpad0 (4, to_ustring (h * 100 + m)));
1308                     } else {
1309                         h = v.tm_gmtoff / 60;
1310                         m = h % 60;
1311                         h = h / 60;
1312                         ans.append (CharConst ("+")).append (colpad0 (4, to_ustring (h * 100 + m)));
1313                     }
1314                 }
1315                 break;
1316             }
1317         }
1318     }
1319     ans.append (b, e);
1320
1321     return ans;
1322 }
1323
1324 ustring  toLower (const ustring& str) {
1325     return boost::to_lower_copy (str);
1326 }
1327
1328 ustring  toUpper (const ustring& str) {
1329     return boost::to_upper_copy (str);
1330 }