lib/util_string.cc

   1 #include "util_string.h"
   2 #include "util_const.h"
   3 #include "util_random.h"
   4 #include "util_splitter.h"
   5 #include "ml.h"
   6 #include "mlenv.h"
   7 #include "motorenv.h"
   8 #include "ustring.h"
   9 #include "utf8.h"
  10 #include "utf16.h"
  11 #include <boost/regex.hpp>
  12 #include <boost/regex/pattern_except.hpp>
  13 #include <boost/algorithm/string.hpp>
  14 #include <vector>
  15 #include <algorithm>
  16 #include <stdlib.h>
  17 #include <unistd.h>
  18 #include <string.h>
  19 #include <time.h>
  20 #include <float.h>
  21 #include <ctype.h>
  22
  23 UIConv::UIConv (const char* in, const char* out) {
  24     cd = iconv_open (in, out);
  25     if (cd == ICONV_ERR) {
  26         throw (ustring (in).append (CharConst (", ")).append (ustring (out)).append (CharConst (": unknown encoding.")));
  27     }
  28 }
  29
  30 ustring  UIConv::cv (const ustring& text) {
  31     ustring  ans;
  32
  33     if (cd != ICONV_ERR) {
  34         char*  buf = new char[4096];
  35         const char*  ibuf;
  36         char*  obuf;
  37         size_t  isize, osize, rsize;
  38
  39         ibuf = text.begin ().base ();
  40         isize = text.size ();
  41         while (isize > 0) {
  42             obuf = buf;
  43             osize = 4096;
  44 #ifdef Linux
  45             rsize = ::iconv (cd, (char**)&ibuf, &isize, &obuf, &osize);
  46 #else
  47             rsize = ::iconv (cd, &ibuf, &isize, &obuf, &osize);
  48 #endif
  49             if (rsize == -1) {
  50                 if (errno == EILSEQ) {
  51                     ibuf ++;
  52                     isize --;
  53                     ans.append (CharConst ("_"));
  54                 } else if (errno == EINVAL) {
  55                 } else if (errno == E2BIG) {
  56                 } else {
  57                     break;
  58                 }
  59             }
  60             if (obuf > buf)
  61                 ans.append (buf, obuf - buf);
  62         }
  63         delete buf;
  64     }
  65     return ans;
  66 }
  67
  68 ustring  c3 (const ustring& str) {
  69     bool  qsign = false;
  70     static uregex  re ("^[0-9]+");
  71     uiterator  b, e;
  72     umatch  m;
  73
  74     b = str.begin ();
  75     e = str.end ();
  76     if (str[0] == '-' || str[0] == '+') {
  77         qsign = true;
  78         b = b + 1;
  79     }
  80     if (usearch (b, e, m, re)) {
  81         int  n = m[0].second - m[0].first;
  82         int  l = str.size () + n / 3;
  83         ustring  ans;
  84
  85         ans.reserve (l);
  86         if (qsign) {
  87             ans.append (1, str[0]);
  88         }
  89         for (; b != m[0].second; b ++) {
  90             ans.append (1, *b);
  91             if (n > 1 && n % 3 == 1) {
  92                 ans.append (CharConst (","));
  93             }
  94             n --;
  95         }
  96         for (; b != e; b ++) {
  97             ans.append (1, *b);
  98         }
  99         return ans;
 100     } else {
 101         return str;
 102     }
 103 }
 104
 105 ustring  to_ustring (double val) {
 106     char  b[32];
 107     return ustring (b, snprintf (b, 32, "%.*g", DBL_DIG, val));
 108 }
 109
 110 static int  hex (char c) {
 111     if ('0' <= c && c <= '9') {
 112         return (c - '0');
 113     } else if ('a' <= c && c <= 'f') {
 114         return (c -  'a' + 10);
 115     } else if ('A' <= c && c <= 'F') {
 116         return (c - 'A' + 10);
 117     } else {
 118         return 0;
 119     }
 120 }
 121
 122 static int  hex (char c1, char c2) {
 123     return (hex (c1) * 16 + hex (c2));
 124 }
 125
 126 static char  hexchar (int c) {
 127     if (0 <= c && c <= 9)
 128         return '0' + c;
 129     else if (10 <= c <= 15)
 130         return 'a' - 10 + c;
 131     else
 132         return '0';
 133 }
 134
 135 static char  hexchar_c (int c) {
 136     if (0 <= c && c <= 9)
 137         return '0' + c;
 138     else if (10 <= c <= 15)
 139         return 'A' - 10 + c;
 140     else
 141         return '0';
 142 }
 143
 144 static ustring  percentHex (int c) {
 145     ustring  ans (3, '%');
 146
 147     ans[1] = hexchar ((c >> 4) & 0x0f);
 148     ans[2] = hexchar (c & 0x0f);
 149     return ans;
 150 }
 151
 152 ustring  percentHEX (int c) {
 153     ustring  ans (3, '%');
 154
 155     ans[1] = hexchar_c ((c >> 4) & 0x0f);
 156     ans[2] = hexchar_c (c & 0x0f);
 157     return ans;
 158 }
 159
 160 ustring  urldecode_nonul (const ustring& str) {
 161     ustring  ans;
 162     static uregex  re ("(\\+)|%([0-9a-fA-F][0-9a-fA-F])|\\x00");
 163     umatch  m;
 164     uiterator  b, e;
 165
 166     ans.reserve (str.size ());
 167     b = str.begin ();
 168     e = str.end ();
 169     while (usearch (b, e, m, re)) {
 170         if (b != m[0].first) {
 171             ans.append (b, m[0].first);
 172         }
 173         if (m[1].matched) {
 174             ans.append (1, ' ');
 175         } else if (m[2].matched) {
 176             int  v = hex (*(m[2].first), *(m[2].first + 1));
 177             if (v != 0)
 178                 ans.append (1, v);
 179         } else {
 180         }
 181         b = m[0].second;
 182     }
 183     if (b != e) {
 184         ans.append (b, e);
 185     }
 186
 187     return ans;
 188 }
 189
 190 ustring  omitPattern (const ustring& text, uregex& re) {
 191     Splitter  sp (text, re);
 192
 193     if (sp.next ()) {
 194         if (sp.match (0)) {
 195             ustring  ans;
 196             ans.reserve (text.length ());
 197             if (sp.begin () != sp.end ())
 198                 ans.append (sp.begin (), sp.end ());
 199             while (sp.next ()) {
 200                 if (sp.begin () != sp.end ())
 201                     ans.append (sp.begin (), sp.end ());
 202             }
 203             return ans;
 204         } else {
 205             return text;
 206         }
 207     } else {
 208         return text;
 209     }
 210 }
 211
 212 ustring  omitCtrl (const ustring& str) {
 213     static uregex  re ("[\\x00-\\x1f\\x7f]+");
 214     return omitPattern (str, re);
 215 }
 216
 217 ustring  omitCtrlX (const ustring& str) {
 218     static uregex  re ("[^\\x09\\x0a\\x20-\\x7e\\x80-\\xff]+");
 219     return omitPattern (str, re);
 220 }
 221
 222 ustring  omitNul (const ustring& str) {
 223     static uregex  re ("[\\x00]+");
 224     return omitPattern (str, re);
 225 }
 226
 227 ustring  omitNL (const ustring& str) {
 228     return omitPattern (str, re_nl);
 229 }
 230
 231 ustring  omitNonAscii (const ustring& str) {
 232     static uregex  re ("[^ -\\x7e]+");
 233     return omitPattern (str, re);
 234 }
 235
 236 ustring  omitNonAsciiWord (const ustring& str) {
 237     static uregex  re ("[^\\x21-\\x7e]+");
 238     return omitPattern (str, re);
 239 }
 240
 241 static ustring  percentEncode (uiterator b, uiterator e, const uregex& re) {
 242     // $1 -> _
 243     // $2 -> %HEX
 244     umatch  m;
 245     ustring  ans;
 246
 247     while (b < e && usearch (b, e, m, re)) {
 248         if (b < m[0].first)
 249             ans.append (b, m[0].first);
 250         if (m[1].matched) {
 251             ans.append (uUScore);
 252         } else if (m[2].matched) {
 253             ans.append (percentHEX (*m[2].first));
 254         } else {
 255             assert (0);
 256         }
 257         b = m[0].second;
 258     }
 259     if (b < e)
 260         ans.append (b, e);
 261
 262     return ans;
 263 }
 264
 265 ustring  percentEncode (uiterator b, uiterator e) {
 266     static uregex  re ("(\\x00)|([^A-Za-z0-9_.~-])");
 267
 268     return percentEncode (b, e, re);
 269 }
 270
 271 ustring  percentEncode_path (uiterator b, uiterator e) {
 272     static uregex  re ("(\\x00)|([^A-Za-z0-9_/.~-])");
 273
 274     return percentEncode (b, e, re);
 275 }
 276
 277 ustring  percentEncode (const ustring& str) {
 278     return percentEncode (str.begin (), str.end ());
 279 }
 280
 281 ustring  percentEncode_path (const ustring& str) {
 282     return percentEncode_path (str.begin (), str.end ());
 283 }
 284
 285 #if 0
 286 ustring  percentEncode_path (uiterator b, uiterator e) {
 287     uiterator  i;
 288     ustring  ans;
 289
 290     for (i = b; i < e; i ++) {
 291         if (*i == '/') {
 292             if (b < i)
 293                 ans.append (percentEncode (b, i));
 294             ans.append (uSlash);
 295             b = i + 1;
 296         }
 297     }
 298     if (b < e)
 299         ans.append (percentEncode (b, e));
 300
 301     return ans;
 302 }
 303
 304 ustring  percentEncode_path (const ustring& str) {
 305     return percentEncode_path (str.begin (), str.end ());
 306 }
 307 #endif
 308
 309 ustring  percentDecode (const ustring& str) {
 310     ustring  ans;
 311     static uregex  re ("%([0-9a-fA-F][0-9a-fA-F])|\\x00");
 312     umatch  m;
 313     uiterator  b, e;
 314
 315     b = str.begin ();
 316     e = str.end ();
 317     while (usearch (b, e, m, re)) {
 318         if (b != m[0].first) {
 319             ans.append (b, m[0].first);
 320         }
 321         if (m[1].matched) {
 322             int  v = hex (*(m[1].first), *(m[1].first + 1));
 323             if (v != 0)
 324                 ans.append (1, v);
 325         } else {
 326         }
 327         b = m[0].second;
 328     }
 329     if (b != e) {
 330         ans.append (b, e);
 331     }
 332
 333     return fixUTF8 (ans);
 334 }
 335
 336 ustring  cookieencode (const ustring& text) {
 337     static uregex  re ("([\\x00-\\x1f\\x7f])|([ ,;%\\x80-\\xff])");
 338
 339     return percentEncode (text.begin (), text.end (), re);
 340 }
 341
 342 ustring  cookiedecode (const ustring& text) {
 343     umatch  m;
 344     uiterator  b, e;
 345     ustring  ans;
 346     int  a;
 347     static uregex  re ("%([0-9a-fA-F])([0-9a-fA-F])");
 348
 349     b = text.begin ();
 350     e = text.end ();
 351     while (usearch (b, e, m, re)) {
 352         if (b != m[0].first)
 353             ans.append (ustring (b, m[0].first));
 354         a = hex (*m[1].first, *m[2].first);
 355         ans.append (1, a);
 356         b = m[0].second;
 357     }
 358     if (b != e)
 359         ans.append (ustring (b, e));
 360
 361     return ans;
 362 }
 363
 364 ustring  clipColon (const ustring& text) {
 365     int  i;
 366     ustring  ans (text);
 367
 368     for (i = 0; i < ans.size (); i ++) {
 369         if (ans[i] == ':')
 370             ans[i] = '_';
 371     }
 372     return ans;
 373 }
 374
 375 ustring  dirPart (const ustring& path) {
 376     ustring::size_type  s = path.rfind ('/', path.size ());
 377
 378     if (s == ustring::npos) {
 379 //      return uSlash;
 380         return uDot;
 381     } else {
 382         return ustring (path.begin (), path.begin () + s);
 383     }
 384 }
 385
 386 ustring  filePart_osSafe (const ustring& path) {
 387     umatch  m;
 388     static uregex  re ("[^\\\\/]+$");
 389
 390     if (usearch (path, m, re)) {
 391         return ustring (m[0].first, m[0].second);
 392     } else {
 393         return uEmpty;
 394     }
 395 }
 396
 397 void  split (uiterator b, uiterator e, uregex& re, std::vector<ustring>& ans) {
 398     Splitter  sp (b, e, re);
 399
 400     while (sp.next ()) {
 401         ans.push_back (sp.cur ());
 402     }
 403 }
 404
 405 void  splitE (uiterator b, uiterator e, uregex& re, std::vector<ustring>& ans) {
 406     Splitter  sp (b, e, re);
 407
 408     if (b != e) {
 409         while (sp.nextSep ()) {
 410             ans.push_back (sp.cur ());
 411         }
 412         ans.push_back (ustring (sp.begin (), sp.eol ()));
 413     }
 414 }
 415
 416 bool  splitChar (uiterator b, uiterator e, uiterator::value_type ch, uiterator& m1) {
 417     for (; b < e; b ++) {
 418         if (*b == ch) {
 419             m1 = b;
 420             return true;
 421         }
 422     }
 423     m1 = e;
 424     return false;
 425 }
 426
 427 ustring  escape_re (const ustring& text) {
 428     ustring::const_iterator  b, e;
 429     umatch  m;
 430     ustring  ans;
 431     int  c;
 432     char  buf[4];
 433     static uregex  re ("[^\\x01- !\"#%',/0-9:;<=>@A-Z_`a-z~\\x7f-\\xff-]");
 434
 435     buf[0] = '\\';
 436     buf[1] = 'x';
 437     ans.reserve (text.size () + 16);
 438     b = text.begin ();
 439     e = text.end ();
 440     while (b != e && usearch (b, e, m, re)) {
 441         if (b != m[0].first)
 442             ans.append (b, m[0].first);
 443         c = *m[0].first;
 444         buf[2] = hexchar ((c >> 4) & 0x0f);
 445         buf[3] = hexchar (c & 0x0f);
 446         ans.append (buf, 4);
 447         b = m[0].second;
 448     }
 449     if (b != e)
 450         ans.append (b, e);
 451     return ans;
 452 }
 453
 454 ustring  slashEncode (const ustring& text) {
 455     ustring::const_iterator  b, e;
 456     umatch  m;
 457     ustring  ans;
 458     int  c;
 459     char  buf[4];
 460     static uregex  re ("([\\x00-\\x1f\\x7f])|(\\\\)|(\")");
 461
 462     buf[0] = '\\';
 463     buf[1] = 'x';
 464     b = text.begin ();
 465     e = text.end ();
 466     while (b != e && usearch (b, e, m, re)) {
 467         if (b != m[0].first)
 468             ans.append (b, m[0].first);
 469         if (m[1].matched) {
 470             c = *m[0].first;
 471             switch (c) {
 472             case '\t':
 473                 ans.append (CharConst ("\\t"));
 474                 break;
 475             case '\r':
 476                 ans.append (CharConst ("\\r"));
 477                 break;
 478             case '\n':
 479                 ans.append (CharConst ("\\n"));
 480                 break;
 481             default:
 482                 buf[2] = hexchar ((c >> 4) & 0x0f);
 483                 buf[3] = hexchar (c & 0x0f);
 484                 ans.append (buf, 4);
 485             }
 486         } else if (m[2].matched) {
 487             ans.append (CharConst ("\\\\"));
 488         } else if (m[3].matched) {
 489             ans.append (CharConst ("\\\""));
 490         } else {
 491             assert (0);
 492         }
 493         b = m[0].second;
 494     }
 495     if (b != e)
 496         ans.append (b, e);
 497     return ans;
 498 }
 499
 500 ustring  slashDecode (const ustring& text) {
 501     ustring::const_iterator  b, e;
 502     umatch  m;
 503     ustring  ans;
 504     int  c;
 505     static uregex  re ("\\\\([0-7][0-7][0-7]|[\\x00-\\x7f])");
 506
 507     b = text.begin ();
 508     e = text.end ();
 509     while (b != e && usearch (b, e, m, re)) {
 510         if (b != m[0].first)
 511             ans.append (b, m[0].first);
 512         b = m[0].first + 1;
 513         c = *b;
 514         switch (c) {
 515         case 't':
 516             ans.append (CharConst ("\t"));
 517             break;
 518         case 'r':
 519             ans.append (CharConst ("\r"));
 520             break;
 521         case 'n':
 522             ans.append (CharConst ("\n"));
 523             break;
 524         default:
 525             if (m[0].second - m[0].first == 4) {
 526                 c = (c - '0') * 64;
 527                 b ++;
 528                 c += (*b - '0') * 8;
 529                 b ++;
 530                 c += *b - '0';
 531                 if (0 < c && c < 0x20)
 532                     ans.append (1, c);
 533             } else {
 534                 ans.append (1, c);
 535             }
 536         }
 537         b = m[0].second;
 538     }
 539     if (b != e)
 540         ans.append (b, e);
 541     return ans;
 542 }
 543
 544 unsigned long  strtoul (const ustring& str) {
 545     return strtoul (str.c_str (), NULL, 10);
 546 }
 547
 548 unsigned long  strtoul (const uiterator& b) {
 549     return strtoul (&*b, NULL, 10);
 550 }
 551
 552 long  strtol (const ustring& str) {
 553     return strtol (str.c_str (), NULL, 10);
 554 }
 555
 556 double  strtod (const ustring& str) {
 557     return strtod (str.c_str (), NULL);
 558 }
 559
 560 bool  passMatch (const ustring& pass, const ustring& cpass) {
 561     if (pass.length () == 0 || cpass.length () == 0)
 562         return false;
 563     return (strcmp (crypt (pass.c_str (), cpass.c_str ()), cpass.c_str ()) == 0);
 564 }
 565
 566 ustring  passCrypt (const ustring& pass) {
 567     ustring  salt = makeSalt ();
 568     return ustring (crypt (pass.c_str (), salt.c_str ()));
 569 }
 570
 571 size_t  strLength (const ustring& src) {
 572     uiterator  b, e;
 573     size_t  n = 0;
 574     b = src.begin ();
 575     e = src.end ();
 576     while (b < e) {
 577         n ++;
 578         nextChar (b, e);
 579     }
 580     return n;
 581 }
 582
 583 void  substring (const ustring& src, size_t idx, size_t len, int flen, ustring& ans) {
 584     uiterator  b, e, t;
 585     size_t  i;
 586
 587     b = src.begin ();
 588     e = src.end ();
 589     for (i = 0; i < idx && b < e; i ++)
 590         nextChar (b, e);
 591     if (flen) {
 592         t = b;
 593         for (i = 0; i < len && t < e; i ++)
 594             nextChar (t, e);
 595         ans.assign (b, t);
 596     } else {
 597         ans.assign (b, e);
 598     }
 599 }
 600
 601 static bool  jssafe[] = {
 602     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,            // 0--15
 603     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,            // 16--31
 604     1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,            // 32--47
 605     1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,            // 48--63
 606     0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,            // 64--79
 607     1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,            // 80--95
 608     0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,            // 96--111
 609     1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,            // 112--127
 610 };
 611
 612 ustring  jsEncode (const ustring& str) {
 613     int  i;
 614     ustring  u, ans;
 615     int  c, d;
 616     char  b[8];
 617
 618     u = utf8to16 (str);
 619     ans.reserve (u.size () * 3);
 620     b[0] = '\\';
 621     b[1] = 'u';
 622     for (i = 0; i < u.size (); i += 2) {
 623         c = u[i];
 624         d = u[i + 1];
 625         if (c == 0 && 0 < d && d < 127 && jssafe[d]) {
 626             ans.append (1, d);
 627         } else {
 628             b[2] = hexchar ((c >> 4) & 0x0f);
 629             b[3] = hexchar (c & 0x0f);
 630             b[4] = hexchar ((d >> 4) & 0x0f);
 631             b[5] = hexchar (d & 0x0f);
 632             ans.append (b, 6);
 633         }
 634     }
 635     return ans;
 636 }
 637
 638 ustring  filenameEncode (const ustring& text) {
 639     static uregex  re ("([\\x00-\\x1f\\x7f])|([^a-zA-Z0-9._-])|(^\\.+)");
 640     Splitter  sp (text, re);
 641     ustring  ans;
 642     int  c;
 643
 644     if (text.length () == 0) {
 645         throw (ustring (text).append (uErrorBadName));
 646     }
 647     ans.reserve (text.length () + 16);
 648     while (sp.next ()) {
 649         if (sp.begin () < sp.end ())
 650             ans.append (sp.begin (), sp.end ());
 651         if (sp.match (1)) {
 652         } else if (sp.match (2)) {
 653             c = *sp.matchBegin (2);
 654             ans.append (1, ':');
 655             ans.append (1, hexchar ((c >> 4) & 0x0f));
 656             ans.append (1, hexchar (c & 0x0f));
 657         } else if (sp.match (3)) {
 658             for (c = sp.matchEnd (3) - sp.matchBegin (3); c > 0; c --) {
 659                 ans.append (CharConst (":2e"));
 660             }
 661         }
 662     }
 663     if (ans.length () > 250)
 664         ans.resize (250);
 665     return ans;
 666 }
 667
 668 ustring  filenameDecode (const ustring& text) {
 669     static uregex  re (":([0-9a-fA-F][0-9a-fA-F])");
 670     Splitter  sp (text, re);
 671     ustring  ans;
 672     int  c;
 673
 674     ans.reserve (text.length ());
 675     while (sp.next ()) {
 676         if (sp.begin () < sp.end ())
 677             ans.append (sp.begin (), sp.end ());
 678         if (sp.match (1)) {
 679             c = hex (*(sp.matchBegin (1))) * 16 + hex (*(sp.matchBegin (1) + 1));
 680             if (32 <= c && c < 256)
 681                 ans.append (1, c);
 682         }
 683     }
 684     return ans;
 685 }
 686
 687 bool  matchSkip (uiterator& b, uiterator e, const char* t, size_t s) {
 688     if (e - b >= s && memcmp (t, &b[0], s) == 0) {
 689         b += s;
 690         return true;
 691     } else {
 692         return false;
 693     }
 694 }
 695
 696 bool  matchHead (uiterator& b, uiterator e, const char* t, size_t s) {
 697     if (e - b >= s && memcmp (t, &b[0], s) == 0) {
 698         return true;
 699     } else {
 700         return false;
 701     }
 702 }
 703
 704 bool  matchHead (const ustring& str, const char* t, size_t s) {
 705     if (str.length () >= s && memcmp (t, &*str.begin (), s) == 0) {
 706         return true;
 707     } else {
 708         return false;
 709     }
 710 }
 711
 712 bool  matchHead (const ustring& str, const ustring& head) {
 713     if (str.length () >= head.length () && memcmp (&*str.begin (), &*head.begin (), head.length ()) == 0) {
 714         return true;
 715     } else {
 716         return false;
 717     }
 718 }
 719
 720 bool  match (uiterator b, uiterator e, const char* t, size_t s) {
 721     if (e - b == s && memcmp (t, &b[0], s) == 0) {
 722         return true;
 723     } else {
 724         return false;
 725     }
 726 }
 727
 728 bool  match (const ustring& str, const char* t, size_t s) {
 729     if (str.length () == s && memcmp (t, str.data (), s) == 0) {
 730         return true;
 731     } else {
 732         return false;
 733     }
 734 }
 735
 736 bool  match (uiterator b, uiterator e, const ustring& str) {
 737     if (e - b == str.length () && memcmp (str.data (), &b[0], str.length ()) == 0) {
 738         return true;
 739     } else {
 740         return false;
 741     }
 742 }
 743
 744 bool  match (const ustring& str, const char* t, size_t s, const char* t2, size_t s2) {
 745     if (match (str, t, s) || match (str, t2, s2)) {
 746         return true;
 747     } else {
 748         return false;
 749     }
 750 }
 751
 752 ustring  clipWhite (uiterator b, uiterator e) {
 753     while (b < e)
 754         if (isblank (*b)) {
 755             b ++;
 756         } else {
 757             break;
 758         }
 759     while (b < e)
 760         if (isblank (*(e - 1))) {
 761             e --;
 762         } else {
 763             break;
 764         }
 765     return ustring (b, e);
 766 }
 767 ustring  clipWhite (const ustring& str) {
 768     return clipWhite (str.begin (), str.end ());
 769 }
 770
 771 ustring  getenvString (const char* key) {
 772     char*  e = getenv (key);
 773     if (e) {
 774         return ustring (e);
 775     } else {
 776         return uEmpty;
 777     }
 778 }
 779
 780 ustring  zeroPad (int n, const ustring& src) {
 781     int  m;
 782
 783     n = std::min (32, n);
 784     m = n - src.length ();
 785     if (m > 0) {
 786         ustring  ans;
 787         ans.reserve (m);
 788         ans.append (m, '0');
 789         ans.append (src);
 790         return ans;
 791     } else {
 792         return src;
 793     }
 794 }
 795
 796 bool  wsearch (const ustring& text, boost::wsmatch& m, const ustring& reg, boost::wregex::flag_type reg_flags, boost::match_flag_type search_flags) {
 797     try {
 798         std::wstring  wtext = utow (text);
 799         std::wstring  wreg = utow (reg);
 800         boost::wregex  wre (wreg, reg_flags);
 801         return regex_search (wtext, m, wre, search_flags);
 802     } catch (boost::regex_error& err) {
 803         throw (uErrorRegexp);
 804     }
 805 }
 806
 807 bool  wsearch_env (MlEnv* mlenv, const ustring& text, const ustring& reg, boost::wregex::flag_type reg_flags, boost::match_flag_type search_flags) {
 808     try {
 809         mlenv->regtext = utow (text);
 810         std::wstring  wreg = utow (reg);
 811         boost::wregex  wre (wreg, reg_flags);
 812         return regex_search (mlenv->regtext, mlenv->regmatch, wre, search_flags);
 813     } catch (boost::regex_error& err) {
 814         throw (uErrorRegexp);
 815     }
 816 }
 817
 818 ustring  wreplace (const ustring& text, const ustring& reg, const ustring& fmt, boost::wregex::flag_type reg_flags, boost::match_flag_type match_flags) {
 819     try {
 820         std::wstring  wtext = utow (text);
 821         std::wstring  wreg = utow (reg);
 822         std::wstring  wfmt = utow (fmt);
 823         boost::wregex  wre (wreg, reg_flags);
 824         std::wstring  ans = regex_replace (wtext, wre, wfmt, match_flags);
 825         return wtou (ans);
 826     } catch (boost::regex_error& err) {
 827         throw (uErrorRegexp);
 828     }
 829 }
 830
 831 ustring  padEmpty (const ustring& name) {
 832     if (name.empty ())
 833         return ustring (CharConst ("(null)"));
 834     else
 835         return name;
 836 }
 837
 838 uint32_t  hextoul (uiterator b, uiterator e) {
 839     uint32_t  ans = 0;
 840     int  n;
 841
 842     for (n = 0; n < 8 && b != e; n ++, b ++) {
 843         ans = (ans << 4) + hex (*b);
 844     }
 845     return ans;
 846 }
 847
 848 ustring  toCRLF (const ustring& str) {
 849     uiterator  b = str.begin ();
 850     uiterator  e = str.end ();
 851     umatch  m;
 852     ustring  ans;
 853
 854     while (usearch (b, e, m, re_lf)) {
 855         ans.append (b, m[0].first).append (uCRLF);
 856         b = m[0].second;
 857     }
 858     ans.append (b, e);
 859     return ans;
 860 }
 861
 862 void  skipSpace (uiterator& b, uiterator e) {
 863     while (b < e && *b == ' ') {
 864         b ++;
 865     }
 866 }
 867
 868 static ustring::value_type  toLower_ustring_value (ustring::value_type v) {
 869     if ('A' <= v && v <= 'Z') {
 870         return v - 'A' + 'a';
 871     } else {
 872         return v;
 873     }
 874 }
 875
 876 ustring  toLower (uiterator b, uiterator e) {
 877     ustring::iterator  i;
 878     ustring  ans;
 879     ans.resize (e - b);
 880     i = ans.begin ();
 881     for (; b < e; b ++, i++) {
 882         *i = toLower_ustring_value (*b);
 883     }
 884     return ans;
 885 }
 886
 887 static void  format_hex (ustring& ans, MNode* a, std::vector<ustring>& par, bool fcap) {
 888     uint32_t  v = 0;
 889     char  buf[32];
 890
 891     if (a)
 892         v = to_int (a);
 893
 894     if (par.size () > 0) {
 895         int  p = strtol (par[0]);
 896         if (p < 0)
 897             p = 1;
 898         if (p > 20)
 899             p = 20;
 900         if (fcap)
 901             ans.append (buf, snprintf (buf, 32, "%.*X", p, v));
 902         else
 903             ans.append (buf, snprintf (buf, 32, "%.*x", p, v));
 904     } else {
 905         if (fcap)
 906             ans.append (buf, snprintf (buf, 32, "%X", v));
 907         else
 908             ans.append (buf, snprintf (buf, 32, "%x", v));
 909     }
 910 }
 911
 912 static void  format_hex (ustring& ans, MNode* a, std::vector<ustring>& par) {
 913     format_hex (ans, a, par, false);
 914 }
 915
 916 static void  format_HEX (ustring& ans, MNode* a, std::vector<ustring>& par) {
 917     format_hex (ans, a, par, true);
 918 }
 919
 920 static void  format_int_sub (ustring& ans, MNode* a, std::vector<ustring>& par, bool pad0 = false) {
 921     int32_t  v = 0;
 922     char  buf[32];
 923     size_t  s;
 924
 925     if (a)
 926         v = to_int (a);
 927
 928     if (par.size () > 0) {
 929         bool  fclip = false;
 930         bool  fzero = pad0;
 931         bool  fc3 = false;
 932         if (match (par[0], CharConst ("comma")) || match (par[0], CharConst ("c"))) {
 933             ans.append (c3 (to_ustring (v)));
 934         } else {
 935             int  p = strtol (par[0]);
 936             if (p < 0)
 937                 p = 1;
 938             if (p > 20)
 939                 p = 20;
 940             for (int i = 1; i < par.size (); i ++) {
 941                 if (match (par[i], CharConst ("clip"))) {
 942                     fclip = true;
 943                 } else if (match (par[i], CharConst ("0"))) {
 944                     fzero = true;
 945                 } else if (match (par[i], CharConst ("comma")) || match (par[i], CharConst ("c"))) {
 946                     fc3 = true;
 947                 } else {
 948                     throw (par[i] + uErrorBadParam);
 949                 }
 950             }
 951             if (fzero)
 952                 s = snprintf (buf, 32, "%.*d", p, v);
 953             else
 954                 s = snprintf (buf, 32, "%*d", p, v);
 955             if (fclip && s > p)
 956                 ans.append (buf + s - p, p);
 957             else if (! fclip && fc3)
 958                 ans.append (c3 (ustring (buf, s)));
 959             else
 960                 ans.append (buf, s);
 961         }
 962     } else {
 963         ans.append (to_ustring (v));
 964     }
 965 }
 966
 967 static void  format_int (ustring& ans, MNode* a, std::vector<ustring>& par) {
 968     format_int_sub (ans, a, par);
 969 }
 970
 971 static void  format_int0 (ustring& ans, MNode* a, std::vector<ustring>& par) {
 972     format_int_sub (ans, a, par, true);
 973 }
 974
 975 static void  format_int (ustring& ans, MNode* a, int c, bool pad0 = false) {
 976     int32_t  v = 0;
 977     char  buf[32];
 978     size_t  s;
 979
 980     if (a)
 981         v = to_int (a);
 982
 983     if (c > 0) {
 984         if (c > 20)
 985             c = 20;
 986         if (pad0)
 987             s = snprintf (buf, 32, "%.*d", c, v);
 988         else
 989             s = snprintf (buf, 32, "%*d", c, v);
 990         if (s > c)
 991             ans.append (buf + s - c, c);
 992         else
 993             ans.append (buf, s);
 994     } else {
 995         ans.append (to_ustring (v));
 996     }
 997 }
 998
 999 static void  format_float (ustring& ans, MNode* a, std::vector<ustring>& par) {
1000     int  p1 = 0;
1001     int  p2 = 0;
1002     char  buf[32];
1003
1004     if (par.size () > 0)
1005         p1 = strtol (par[0]);
1006     if (par.size () > 1)
1007         p2 = strtol (par[1]);
1008     if (p1 < 0)
1009         p1 = 0;
1010     if (p2 < 0)
1011         p2 = 0;
1012     if (p1 > 20)
1013         p1 = 20;
1014     if (p2 > 20)
1015         p2 = 20;
1016     ans.append (buf, snprintf (buf, 32, "%*.*lf", p1, p2, to_double (a)));
1017 }
1018
1019 static void  format_string (ustring& ans, MNode* a, std::vector<ustring>& par) {
1020     int  p = 0;
1021     bool  fright = false;
1022     ustring  u = to_string (a);
1023
1024     if (par.size () > 0)
1025         p = strtol (par[0]);
1026     if (p > 65536)
1027         p = 65536;
1028     if (par.size () > 1) {
1029         if (match (par[1], CharConst ("right")) || match (par[1], CharConst ("r")))
1030             fright = true;
1031         else
1032             throw (par[1] + uErrorBadParam);
1033     }
1034     if (fright) {
1035         if (u.size () < p)
1036             ans.append (p - u.size (), ' ').append (u);
1037         else
1038             ans.append (u);
1039     } else {
1040         if (u.size () < p)
1041             ans.append (u).append (p - u.size (), ' ');
1042         else
1043             ans.append (u);
1044     }
1045 }
1046
1047 static void  format_literal (ustring& ans, MNode* a, const char* list[], int offset, size_t size) {
1048     int  v;
1049
1050     if (a) {
1051         v = to_int (a) - offset;
1052         if (0 <= v && v < size)
1053             ans.append (list[v]);
1054     }
1055 }
1056
1057 static const char*  mstr_a[] = {
1058     "Jan", "Feb", "Mar", "Apr",
1059     "May", "Jun", "Jul", "Aug",
1060     "Sep", "Oct", "Nov", "Dec"
1061 };
1062 static const char*  mstr[] = {
1063     "January", "February", "March", "April",
1064     "May", "June", "July", "August",
1065     "September", "October", "November", "December"
1066 };
1067 static void  format_month (ustring& ans, MNode* a, std::vector<ustring>& par) {
1068     format_literal (ans, a, mstr_a, 1, 12);
1069 }
1070
1071 static void  format_Month (ustring& ans, MNode* a, std::vector<ustring>& par) {
1072     format_literal (ans, a, mstr, 1, 12);
1073 }
1074
1075 static const char*  WStr_a[] = {
1076     "Sun", "Mon", "Tue", "Wed",
1077     "Thu", "Fri", "Sat"
1078 };
1079
1080 static const char*  WStr[] = {
1081     "Sunday", "Monday", "Tuesday", "Wednesday",
1082     "Thursday", "Friday", "Saturday"
1083 };
1084
1085 static void  format_week (ustring& ans, MNode* a, std::vector<ustring>& par) {
1086     format_literal (ans, a, WStr_a, 0, 7);
1087 }
1088
1089 static void  format_Week (ustring& ans, MNode* a, std::vector<ustring>& par) {
1090     format_literal (ans, a, WStr, 0, 7);
1091 }
1092
1093 ustring  formatString (const ustring& format, boost::ptr_vector<MNodePtr>& par) {
1094     ustring  ans;
1095     uiterator  b, e;
1096     umatch  m;
1097     u_int  i;
1098     MNode*  a;
1099     static uregex  re ("\\$\\{([1-9][0-9]*)(:([a-zA-Z][a-zA-Z0-9]*)(:([0-9a-z.:]+))?)?\\}");
1100     static struct {
1101         const char* name;
1102         size_t  namelen;
1103         void  (*fn)(ustring& ans, MNode* a, std::vector<ustring>& par);
1104     }  formatFunc[] = {
1105         {CharConst ("hex"), format_hex},
1106         {CharConst ("HEX"), format_HEX},
1107         {CharConst ("int"), format_int},
1108         {CharConst ("int0"), format_int0},
1109         {CharConst ("float"), format_float},
1110         {CharConst ("string"), format_string},
1111         {CharConst ("month"), format_month},
1112         {CharConst ("Month"), format_Month},
1113         {CharConst ("week"), format_week},
1114         {CharConst ("Week"), format_Week},
1115         {NULL, 0, NULL}
1116     };
1117
1118     b = format.begin ();
1119     e = format.end ();
1120     while (usearch (b, e, m, re)) {
1121         ans.append (b, m[0].first);
1122         b = m[0].second;
1123         i = strtoul (ustring (m[1].first, m[1].second)) - 1;
1124         if (i < par.size ()) {
1125             a = par[i] ();
1126         } else {
1127             a = NULL;
1128         }
1129         if (! m[2].matched) {
1130             if (a)
1131                 ans.append (to_string (a));
1132         } else {
1133             std::vector<ustring>  fpar;
1134             int  i;
1135             if (m[4].matched)
1136                 split (m[5].first, m[5].second, re_colon, fpar);
1137             for (i = 0; formatFunc[i].name; i ++) {
1138                 if (match (m[3].first, m[3].second, formatFunc[i].name, formatFunc[i].namelen)) {
1139                     (*formatFunc[i].fn) (ans, a, fpar);
1140                     goto Bp1;
1141                 }
1142             }
1143             ans.append (m[0].first, m[0].second);
1144         Bp1:;
1145         }
1146     }
1147     ans.append (b, e);
1148
1149     return ans;
1150 }
1151
1152 static ustring  colpad0 (int n, const ustring& src) {
1153     int  m;
1154
1155     if (n > 0) {
1156         n = std::min (32, n);
1157         m = n - src.length ();
1158         if (m > 0) {
1159             ustring  ans;
1160             ans.reserve (n);
1161             ans.append (m, '0');
1162             ans.append (src);
1163             return ans;
1164         } else if (m == 0) {
1165             return src;
1166         } else {
1167             return ustring (src.end () - n, src.end ());
1168         }
1169     } else {
1170         return src;
1171     }
1172 }
1173
1174 /*
1175  ${Y:4}, ${Y:2}
1176  ${M:2}, ${M}, ${M:name}, ${M:ab}
1177  ${D:2}, ${D}
1178  ${h:2}, ${h}
1179  ${m:2}, ${m}
1180  ${s:2}, ${s}
1181  ${W}, ${w}
1182  ${o}
1183 */
1184 //ustring  formatDateString (const ustring& format, time_t tm) {
1185 ustring  formatDateString (const ustring& format, struct tm& v) {
1186     ustring  ans;
1187 //    struct tm  v;
1188     uiterator  b, e;
1189     umatch  m;
1190     int  pc;
1191 //    static uregex  re ("\\$\\{([YMDhmsWw])(:([0-9]))?\\}");
1192     static uregex  re ("\\$\\{(([YMDhmsWwo])(:([0-9]))?|M:((name)|(ab)|(abname)))\\}");
1193     std::vector<ustring>  fpar;
1194
1195 //    localtime_r (&tm, &v);
1196     b = format.begin ();
1197     e = format.end ();
1198     while (usearch (b, e, m, re)) {
1199         ans.append (b, m[0].first);
1200         b = m[0].second;
1201         if (m[5].matched) {
1202             if (m[6].matched) { // name
1203                 ans.append (mstr[v.tm_mon]);
1204             } else if (m[7].matched || m[8].matched) { // abname
1205                 ans.append (mstr_a[v.tm_mon]);
1206             }
1207         } else {
1208 //          if (m[2].matched) {
1209             if (m[3].matched) {
1210 //              pc = strtol (ustring (m[3].first, m[3].second));
1211                 pc = strtol (ustring (m[4].first, m[4].second));
1212             } else {
1213                 pc = 0;
1214             }
1215 //          switch (*m[1].first) {
1216             switch (*m[2].first) {
1217             case 'Y':
1218                 ans.append (colpad0 (pc, to_ustring (v.tm_year + 1900)));
1219                 break;
1220             case 'M':
1221                 ans.append (colpad0 (pc, to_ustring (v.tm_mon + 1)));
1222                 break;
1223             case 'D':
1224                 ans.append (colpad0 (pc, to_ustring (v.tm_mday)));
1225                 break;
1226             case 'h':
1227                 ans.append (colpad0 (pc, to_ustring (v.tm_hour)));
1228                 break;
1229             case 'm':
1230                 ans.append (colpad0 (pc, to_ustring (v.tm_min)));
1231                 break;
1232             case 's':
1233                 ans.append (colpad0 (pc, to_ustring (v.tm_sec)));
1234                 break;
1235             case 'W':
1236                 ans.append (WStr [v.tm_wday]);
1237                 break;
1238             case 'w':
1239                 ans.append (WStr_a [v.tm_wday]);
1240                 break;
1241             case 'o':
1242                 {
1243                     int  h, m;
1244                     if (v.tm_gmtoff < 0) {
1245                         h = - v.tm_gmtoff / 60;
1246                         m = h % 60;
1247                         h = h / 60;
1248                         ans.append (CharConst ("-")).append (colpad0 (4, to_ustring (h * 100 + m)));
1249                     } else {
1250                         h = v.tm_gmtoff / 60;
1251                         m = h % 60;
1252                         h = h / 60;
1253                         ans.append (CharConst ("+")).append (colpad0 (4, to_ustring (h * 100 + m)));
1254                     }
1255                 }
1256                 break;
1257             }
1258         }
1259     }
1260     ans.append (b, e);
1261
1262     return ans;
1263 }
1264
1265 ustring  toLower (const ustring& str) {
1266     return boost::to_lower_copy (str);
1267 }
1268
1269 ustring  toUpper (const ustring& str) {
1270     return boost::to_upper_copy (str);
1271 }
1272
1273 ustring  hexEncode (const ustring& data) {
1274     ustring  ans;
1275     uiterator  b, e;
1276
1277     ans.reserve (data.length () * 2);
1278     b = data.begin ();
1279     e = data.end ();
1280     for (; b < e; b ++) {
1281         ans.append (1, hexchar ((*b >> 4) & 0x0f));
1282         ans.append (1, hexchar (*b & 0x0f));
1283     }
1284     return ans;
1285 }