lib/util_string.cc

   1 #include "util_string.h"
   2 #include "util_const.h"
   3 #include "util_random.h"
   4 #include "ml.h"
   5 #include "mlenv.h"
   6 #include "motorenv.h"
   7 #include "ustring.h"
   8 #include "utf8.h"
   9 #include "utf16.h"
  10 #include <boost/regex.hpp>
  11 #include <boost/regex/pattern_except.hpp>
  12 #include <iconv.h>
  13 #include <vector>
  14 #include <algorithm>
  15 #include <stdlib.h>
  16 #include <unistd.h>
  17 #include <string.h>
  18 #include <float.h>
  19 #include <ctype.h>
  20
  21 ustring  c3 (const ustring& str) {
  22     bool  qsign = false;
  23     static uregex  re ("^[0-9]+");
  24     uiterator  b, e;
  25     umatch  m;
  26
  27     b = str.begin ();
  28     e = str.end ();
  29     if (str[0] == '-' || str[0] == '+') {
  30         qsign = true;
  31         b = b + 1;
  32     }
  33     if (usearch (b, e, m, re)) {
  34         int  n = m[0].second - m[0].first;
  35         int  l = str.size () + n / 3;
  36         ustring  ans;
  37
  38         ans.reserve (l);
  39         if (qsign) {
  40             ans.append (1, str[0]);
  41         }
  42         for (; b != m[0].second; b ++) {
  43             ans.append (1, *b);
  44             if (n > 1 && n % 3 == 1) {
  45                 ans.append (CharConst (","));
  46             }
  47             n --;
  48         }
  49         for (; b != e; b ++) {
  50             ans.append (1, *b);
  51         }
  52         return ans;
  53     } else {
  54         return str;
  55     }
  56 }
  57
  58 ustring  to_ustring (double val) {
  59     char  b[32];
  60     return ustring (b, snprintf (b, 32, "%.*g", DBL_DIG, val));
  61 }
  62
  63 static int  hex (char c) {
  64     if ('0' <= c && c <= '9') {
  65         return (c - '0');
  66     } else if ('a' <= c && c <= 'f') {
  67         return (c -  'a' + 10);
  68     } else if ('A' <= c && c <= 'F') {
  69         return (c - 'A' + 10);
  70     } else {
  71         return 0;
  72     }
  73 }
  74
  75 static int  hex (char c1, char c2) {
  76     return (hex (c1) * 16 + hex (c2));
  77 }
  78
  79 static char  hexchar (int c) {
  80     if (0 <= c && c <= 9)
  81         return '0' + c;
  82     else if (10 <= c <= 15)
  83         return 'a' - 10 + c;
  84     else
  85         return '0';
  86 }
  87
  88 static ustring  percentHex (int c) {
  89     ustring  ans (3, '%');
  90
  91     ans[1] = hexchar ((c >> 4) & 0x0f);
  92     ans[2] = hexchar (c & 0x0f);
  93     return ans;
  94 }
  95
  96 ustring  urldecode_nonul (const ustring& str) {
  97     ustring  ans;
  98     static uregex  re ("(\\+)|%([0-9a-fA-F][0-9a-fA-F])|\\x00");
  99     umatch  m;
 100     uiterator  b, e;
 101
 102     ans.reserve (str.size ());
 103     b = str.begin ();
 104     e = str.end ();
 105     while (usearch (b, e, m, re)) {
 106         if (b != m[0].first) {
 107             ans.append (b, m[0].first);
 108         }
 109         if (m[1].matched) {
 110             ans.append (1, ' ');
 111         } else if (m[2].matched) {
 112             int  v = hex (*(m[2].first), *(m[2].first + 1));
 113             if (v != 0)
 114                 ans.append (1, v);
 115         } else {
 116         }
 117         b = m[0].second;
 118     }
 119     if (b != e) {
 120         ans.append (b, e);
 121     }
 122
 123     return ans;
 124 }
 125
 126 static ustring  omitPattern (const ustring& text, uregex& re) {
 127     Splitter  sp (text, re);
 128
 129     if (sp.next ()) {
 130         if (sp.match (0)) {
 131             ustring  ans;
 132             ans.reserve (text.length ());
 133             if (sp.begin () != sp.end ())
 134                 ans.append (sp.begin (), sp.end ());
 135             while (sp.next ()) {
 136                 if (sp.begin () != sp.end ())
 137                     ans.append (sp.begin (), sp.end ());
 138             }
 139             return ans;
 140         } else {
 141             return text;
 142         }
 143     } else {
 144         return text;
 145     }
 146 }
 147
 148 ustring  omitCtrl (const ustring& str) {
 149     static uregex  re ("[\\x00-\\x1f\\x7f]+");
 150     return omitPattern (str, re);
 151 }
 152
 153 ustring  omitNul (const ustring& str) {
 154     static uregex  re ("[\\x00]+");
 155     return omitPattern (str, re);
 156 }
 157
 158 ustring  omitNL (const ustring& str) {
 159     return omitPattern (str, re_nl);
 160 }
 161
 162 ustring  omitNonAscii (const ustring& str) {
 163     static uregex  re ("[^ -\\x7e]+");
 164     return omitPattern (str, re);
 165 }
 166
 167 ustring  omitNonAsciiWord (const ustring& str) {
 168     static uregex  re ("[^\\x21-\\x7e]+");
 169     return omitPattern (str, re);
 170 }
 171
 172 bool  to_bool (const ustring& v) {
 173     if (v.length () == 0 || (v.length () == 1 && v[0] == '0')) {
 174         return false;
 175     } else {
 176         return true;
 177     }
 178 }
 179
 180 static ustring  percentEncode (const ustring& text, uregex& re) {
 181     /* $1 -> _
 182        $2 -> %HEX
 183     */
 184     umatch  m;
 185     uiterator  b, e;
 186     ustring  ans;
 187
 188     b = text.begin ();
 189     e = text.end ();
 190     if (b != e && usearch (b, e, m, re)) {
 191         if (b != m[0].first) {
 192             ans.append (ustring (b, m[0].first));
 193         }
 194         if (m[1].matched) {
 195             ans.append (uUScore);
 196         } else if (m[2].matched) {
 197             ans.append (percentHex (*m[2].first));
 198         } else {
 199             assert (0);
 200         }
 201         b = m[0].second;
 202         while (b != e && usearch (b, e, m, re)) {
 203             if (b != m[0].first) {
 204                 ans.append (ustring (b, m[0].first));
 205             }
 206             if (m[1].matched) {
 207                 ans.append (uUScore);
 208             } else if (m[2].matched) {
 209                 ans.append (percentHex (*m[2].first));
 210             } else {
 211                 assert (0);
 212             }
 213             b = m[0].second;
 214         }
 215         if (b != e) {
 216             ans.append (ustring (b, e));
 217         }
 218         return ans;
 219     } else {
 220         return text;
 221     }
 222 }
 223
 224 ustring  urlencode (const ustring& url) {
 225     static uregex  re ("(\\x00)|([^a-zA-Z0-9_.,/-])");
 226
 227     return percentEncode (url, re);
 228 }
 229
 230 ustring  cookieencode (const ustring& text) {
 231     static uregex  re ("([\\x00-\\x1f\\x7f])|([ ,;%\\x80-\\xff])");
 232
 233     return percentEncode (text, re);
 234 }
 235
 236 ustring  cookiedecode (const ustring& text) {
 237     umatch  m;
 238     uiterator  b, e;
 239     ustring  ans;
 240     int  a;
 241     static uregex  re ("%([0-9a-fA-F])([0-9a-fA-F])");
 242
 243     b = text.begin ();
 244     e = text.end ();
 245     while (usearch (b, e, m, re)) {
 246         if (b != m[0].first)
 247             ans.append (ustring (b, m[0].first));
 248         a = hex (*m[1].first, *m[2].first);
 249         ans.append (1, a);
 250         b = m[0].second;
 251     }
 252     if (b != e)
 253         ans.append (ustring (b, e));
 254
 255     return ans;
 256 }
 257
 258 ustring  clipColon (const ustring& text) {
 259     int  i;
 260     ustring  ans (text);
 261
 262     for (i = 0; i < ans.size (); i ++) {
 263         if (ans[i] == ':')
 264             ans[i] = '_';
 265     }
 266     return ans;
 267 }
 268
 269 ustring  dirPart (char* path) {
 270     char*  e = rindex (path, '/');
 271
 272     if (e && e != path) {
 273         return ustring (path, e - path);
 274     } else {
 275         return uSlash;
 276     }
 277 }
 278
 279 ustring  dirPart (const ustring& path) {
 280     ustring::size_type  s = path.rfind ('/', path.size ());
 281
 282     if (s == ustring::npos) {
 283         return uSlash;
 284     } else {
 285         return ustring (path.begin (), path.begin () + s);
 286     }
 287 }
 288
 289 ustring  filePart_osSafe (const ustring& path) {
 290     umatch  m;
 291     static uregex  re ("[^\\\\/]+$");
 292
 293     if (usearch (path, m, re)) {
 294         return ustring (m[0].first, m[0].second);
 295     } else {
 296         return uEmpty;
 297     }
 298 }
 299
 300 void  split (uiterator b, uiterator e, uregex& re, std::vector<ustring>& ans) {
 301     Splitter  sp (b, e, re);
 302
 303     while (sp.next ()) {
 304         ans.push_back (sp.cur ());
 305     }
 306 }
 307
 308 bool  splitChar (uiterator b, uiterator e, uiterator::value_type ch, uiterator& m1) {
 309     for (; b < e; b ++) {
 310         if (*b == ch) {
 311             m1 = b;
 312             return true;
 313         }
 314     }
 315     m1 = e;
 316     return false;
 317 }
 318
 319 static char  Base64Char[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 320 ustring  base64Encode (uiterator b, uiterator e) {
 321     ustring  ans;
 322     size_t  size;
 323     int  c0, c1, c2;
 324
 325     while (b != e) {
 326         size = e - b;
 327         if (size >= 3) {
 328             c0 = *b ++;
 329             c1 = *b ++;
 330             c2 = *b ++;
 331             ans.append (1, Base64Char[(c0 >> 2) & 0x3f]);
 332             ans.append (1, Base64Char[((c0 & 0x03) << 4) | ((c1 >> 4) & 0x0f)]);
 333             ans.append (1, Base64Char[((c1 & 0x0f) << 2) | ((c2 >> 6) & 0x03)]);
 334             ans.append (1, Base64Char[c2 & 0x3f]);
 335         } else if (size == 2) {
 336             c0 = *b ++;
 337             c1 = *b ++;
 338             ans.append (1, Base64Char[(c0 >> 2) & 0x3f]);
 339             ans.append (1, Base64Char[((c0 & 0x03) << 4) | ((c1 >> 4) & 0x0f)]);
 340             ans.append (1, Base64Char[((c1 & 0x0f) << 2)]);
 341             ans.append (1, '=');
 342         } else if (size == 1) {
 343             c0 = *b ++;
 344             ans.append (1, Base64Char[(c0 >> 2) & 0x3f]);
 345             ans.append (1, Base64Char[((c0 & 0x03) << 4)]);
 346             ans.append (1, '=');
 347             ans.append (1, '=');
 348         } else {
 349             break;
 350         }
 351     }
 352     return ans;
 353 }
 354
 355 ustring  escape_re (const ustring& text) {
 356     ustring::const_iterator  b, e;
 357     umatch  m;
 358     ustring  ans;
 359     int  c;
 360     char  buf[4];
 361     static uregex  re ("[^\\x01- !\"#%',/0-9:;<=>@A-Z_`a-z~\\x7f-\\xff-]");
 362
 363     buf[0] = '\\';
 364     buf[1] = 'x';
 365     ans.reserve (text.size () + 16);
 366     b = text.begin ();
 367     e = text.end ();
 368     while (b != e && usearch (b, e, m, re)) {
 369         if (b != m[0].first)
 370             ans.append (b, m[0].first);
 371         c = *m[0].first;
 372         buf[2] = hexchar ((c >> 4) & 0x0f);
 373         buf[3] = hexchar (c & 0x0f);
 374         ans.append (buf, 4);
 375         b = m[0].second;
 376     }
 377     if (b != e)
 378         ans.append (b, e);
 379     return ans;
 380 }
 381
 382 ustring  slashEncode (const ustring& text) {
 383     ustring::const_iterator  b, e;
 384     umatch  m;
 385     ustring  ans;
 386     int  c;
 387     char  buf[4];
 388     static uregex  re ("([\\x00-\\x1f\\x7f])|(\\\\)|(\")");
 389
 390     buf[0] = '\\';
 391     buf[1] = 'x';
 392     b = text.begin ();
 393     e = text.end ();
 394     while (b != e && usearch (b, e, m, re)) {
 395         if (b != m[0].first)
 396             ans.append (b, m[0].first);
 397         if (m[1].matched) {
 398             c = *m[0].first;
 399             switch (c) {
 400             case '\t':
 401                 ans.append (CharConst ("\\t"));
 402                 break;
 403             case '\r':
 404                 ans.append (CharConst ("\\r"));
 405                 break;
 406             case '\n':
 407                 ans.append (CharConst ("\\n"));
 408                 break;
 409             default:
 410                 buf[2] = hexchar ((c >> 4) & 0x0f);
 411                 buf[3] = hexchar (c & 0x0f);
 412                 ans.append (buf, 4);
 413             }
 414         } else if (m[2].matched) {
 415             ans.append (CharConst ("\\\\"));
 416         } else if (m[3].matched) {
 417             ans.append (CharConst ("\\\""));
 418         } else {
 419             assert (0);
 420         }
 421         b = m[0].second;
 422     }
 423     if (b != e)
 424         ans.append (b, e);
 425     return ans;
 426 }
 427
 428 ustring  slashDecode (const ustring& text) {
 429     ustring::const_iterator  b, e;
 430     umatch  m;
 431     ustring  ans;
 432     int  c;
 433     static uregex  re ("\\\\([0-7][0-7][0-7]|[\\x00-\\x7f])");
 434
 435     b = text.begin ();
 436     e = text.end ();
 437     while (b != e && usearch (b, e, m, re)) {
 438         if (b != m[0].first)
 439             ans.append (b, m[0].first);
 440         b = m[0].first + 1;
 441         c = *b;
 442         switch (c) {
 443         case 't':
 444             ans.append (CharConst ("\t"));
 445             break;
 446         case 'r':
 447             ans.append (CharConst ("\r"));
 448             break;
 449         case 'n':
 450             ans.append (CharConst ("\n"));
 451             break;
 452         default:
 453             if (m[0].second - m[0].first == 4) {
 454                 c = (c - '0') * 64;
 455                 b ++;
 456                 c += (*b - '0') * 8;
 457                 b ++;
 458                 c += *b - '0';
 459                 if (0 < c && c < 0x20)
 460                     ans.append (1, c);
 461             } else {
 462                 ans.append (1, c);
 463             }
 464         }
 465         b = m[0].second;
 466     }
 467     if (b != e)
 468         ans.append (b, e);
 469     return ans;
 470 }
 471
 472 unsigned long  strtoul (const ustring& str) {
 473     return strtoul (str.c_str (), NULL, 10);
 474 }
 475
 476 unsigned long  strtoul (const uiterator& b) {
 477     return strtoul (&*b, NULL, 10);
 478 }
 479
 480 long  strtol (const ustring& str) {
 481     return strtol (str.c_str (), NULL, 10);
 482 }
 483
 484 double  strtod (const ustring& str) {
 485     return strtod (str.c_str (), NULL);
 486 }
 487
 488 bool  passMatch (const ustring& pass, const ustring& cpass) {
 489     if (pass.length () == 0 || cpass.length () == 0)
 490         return false;
 491     return (strcmp (crypt (pass.c_str (), cpass.c_str ()), cpass.c_str ()) == 0);
 492 }
 493
 494 ustring  passCrypt (const ustring& pass) {
 495     ustring  salt = makeSalt ();
 496     return ustring (crypt (pass.c_str (), salt.c_str ()));
 497 }
 498
 499 size_t  strLength (const ustring& src) {
 500     uiterator  b, e;
 501     size_t  n = 0;
 502     b = src.begin ();
 503     e = src.end ();
 504     while (b < e) {
 505         n ++;
 506         nextChar (b, e);
 507     }
 508     return n;
 509 }
 510
 511 void  substring (const ustring& src, size_t idx, size_t len, int flen, ustring& ans) {
 512     uiterator  b, e, t;
 513     size_t  i;
 514
 515     b = src.begin ();
 516     e = src.end ();
 517     for (i = 0; i < idx && b < e; i ++)
 518         nextChar (b, e);
 519     if (flen) {
 520         t = b;
 521         for (i = 0; i < len && t < e; i ++)
 522             nextChar (t, e);
 523         ans.assign (b, t);
 524     } else {
 525         ans.assign (b, e);
 526     }
 527 }
 528
 529 static bool  jssafe[] = {
 530     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,            // 0--15
 531     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,            // 16--31
 532     1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,            // 32--47
 533     1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,            // 48--63
 534     0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,            // 64--79
 535     1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,            // 80--95
 536     0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,            // 96--111
 537     1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,            // 112--127
 538 };
 539
 540 ustring  jsEncode (const ustring& str) {
 541     int  i;
 542     ustring  u, ans;
 543     int  c, d;
 544     char  b[8];
 545
 546     u = utf8to16 (str);
 547     ans.reserve (u.size () * 3);
 548     b[0] = '\\';
 549     b[1] = 'u';
 550     for (i = 0; i < u.size (); i += 2) {
 551         c = u[i];
 552         d = u[i + 1];
 553         if (c == 0 && 0 < d && d < 127 && jssafe[d]) {
 554             ans.append (1, d);
 555         } else {
 556             b[2] = hexchar ((c >> 4) & 0x0f);
 557             b[3] = hexchar (c & 0x0f);
 558             b[4] = hexchar ((d >> 4) & 0x0f);
 559             b[5] = hexchar (d & 0x0f);
 560             ans.append (b, 6);
 561         }
 562     }
 563     return ans;
 564 }
 565
 566 ustring  filenameEncode (const ustring& text) {
 567     static uregex  re ("([\\x00-\\x1f\\x7f])|([^a-zA-Z0-9._-])|(^\\.+)");
 568     Splitter  sp (text, re);
 569     ustring  ans;
 570     int  c;
 571
 572     if (text.length () == 0) {
 573         throw (ustring (text).append (uErrorBadName));
 574     }
 575     ans.reserve (text.length () + 16);
 576     while (sp.next ()) {
 577         if (sp.begin () < sp.end ())
 578             ans.append (sp.begin (), sp.end ());
 579         if (sp.match (1)) {
 580         } else if (sp.match (2)) {
 581             c = *sp.matchBegin (2);
 582             ans.append (1, ':');
 583             ans.append (1, hexchar ((c >> 4) & 0x0f));
 584             ans.append (1, hexchar (c & 0x0f));
 585         } else if (sp.match (3)) {
 586             for (c = sp.matchEnd (3) - sp.matchBegin (3); c > 0; c --) {
 587                 ans.append (CharConst (":2e"));
 588             }
 589         }
 590     }
 591     if (ans.length () > 250)
 592         ans.resize (250);
 593     return ans;
 594 }
 595
 596 ustring  filenameDecode (const ustring& text) {
 597     static uregex  re (":([0-9a-fA-F][0-9a-fA-F])");
 598     Splitter  sp (text, re);
 599     ustring  ans;
 600     int  c;
 601
 602     ans.reserve (text.length ());
 603     while (sp.next ()) {
 604         if (sp.begin () < sp.end ())
 605             ans.append (sp.begin (), sp.end ());
 606         if (sp.match (1)) {
 607             c = hex (*(sp.matchBegin (1))) * 16 + hex (*(sp.matchBegin (1) + 1));
 608             if (32 <= c && c < 256)
 609                 ans.append (1, c);
 610         }
 611     }
 612     return ans;
 613 }
 614
 615 bool  matchSkip (uiterator& b, uiterator e, const char* t, size_t s) {
 616     if (e - b >= s && memcmp (t, &b[0], s) == 0) {
 617         b += s;
 618         return true;
 619     } else {
 620         return false;
 621     }
 622 }
 623
 624 bool  matchHead (uiterator& b, uiterator e, const char* t, size_t s) {
 625     if (e - b >= s && memcmp (t, &b[0], s) == 0) {
 626         return true;
 627     } else {
 628         return false;
 629     }
 630 }
 631
 632 bool  matchHead (const ustring& str, const char* t, size_t s) {
 633     if (str.length () >= s && memcmp (t, &*str.begin (), s) == 0) {
 634         return true;
 635     } else {
 636         return false;
 637     }
 638 }
 639
 640 bool  matchHead (const ustring& str, const ustring& head) {
 641     if (str.length () >= head.length () && memcmp (&*str.begin (), &*head.begin (), head.length ()) == 0) {
 642         return true;
 643     } else {
 644         return false;
 645     }
 646 }
 647
 648 bool  match (uiterator b, uiterator e, const char* t, size_t s) {
 649     if (e - b == s && memcmp (t, &b[0], s) == 0) {
 650         return true;
 651     } else {
 652         return false;
 653     }
 654 }
 655
 656 bool  match (const ustring& str, const char* t, size_t s) {
 657     if (str.length () == s && memcmp (t, str.data (), s) == 0) {
 658         return true;
 659     } else {
 660         return false;
 661     }
 662 }
 663
 664 bool  match (uiterator b, uiterator e, const ustring& str) {
 665     if (e - b == str.length () && memcmp (str.data (), &b[0], str.length ()) == 0) {
 666         return true;
 667     } else {
 668         return false;
 669     }
 670 }
 671
 672 bool  match (const ustring& str, const char* t, size_t s, const char* t2, size_t s2) {
 673     if (match (str, t, s) || match (str, t2, s2)) {
 674         return true;
 675     } else {
 676         return false;
 677     }
 678 }
 679
 680 ustring  clipWhite (uiterator b, uiterator e) {
 681     while (b < e)
 682         if (isblank (*b)) {
 683             b ++;
 684         } else {
 685             break;
 686         }
 687     while (b < e)
 688         if (isblank (*(e - 1))) {
 689             e --;
 690         } else {
 691             break;
 692         }
 693     return ustring (b, e);
 694 }
 695 ustring  clipWhite (const ustring& str) {
 696     return clipWhite (str.begin (), str.end ());
 697 }
 698
 699 ustring  getenvString (const char* key) {
 700     char*  e = getenv (key);
 701     if (e) {
 702         return ustring (e);
 703     } else {
 704         return uEmpty;
 705     }
 706 }
 707
 708 ustring  zeroPad (int n, const ustring& src) {
 709     int  m;
 710
 711     n = std::min (32, n);
 712     m = n - src.length ();
 713     if (m > 0) {
 714         ustring  ans;
 715         ans.reserve (m);
 716         ans.append (m, '0');
 717         ans.append (src);
 718         return ans;
 719     } else {
 720         return src;
 721     }
 722 }
 723
 724 bool  wsearch (const ustring& text, boost::wsmatch& m, const ustring& reg, boost::wregex::flag_type reg_flags, boost::match_flag_type search_flags) {
 725     try {
 726         std::wstring  wtext = utow (text);
 727         std::wstring  wreg = utow (reg);
 728         boost::wregex  wre (wreg, reg_flags);
 729         return regex_search (wtext, m, wre, search_flags);
 730     } catch (boost::regex_error& err) {
 731         throw (uErrorRegexp);
 732     }
 733 }
 734
 735 bool  wsearch_env (MlEnv* mlenv, const ustring& text, boost::wsmatch& m, const ustring& reg, boost::wregex::flag_type reg_flags, boost::match_flag_type search_flags) {
 736     try {
 737         mlenv->env->regtext = utow (text);
 738         std::wstring  wreg = utow (reg);
 739         boost::wregex  wre (wreg, reg_flags);
 740         return regex_search (mlenv->env->regtext, m, wre, search_flags);
 741     } catch (boost::regex_error& err) {
 742         throw (uErrorRegexp);
 743     }
 744 }
 745
 746 ustring  uiconv (const ustring& src, const char* tocode, const char* fromcode) {
 747     iconv_t  cd;
 748     char  buf[4096];
 749     const char*  ibuf;
 750     char*  obuf;
 751     size_t  isize, osize, rsize;
 752     ustring  ans;
 753
 754     cd = iconv_open (tocode, fromcode);
 755     if (cd == (iconv_t)(-1))
 756         throw (ustring ("bad encoding name."));
 757     ibuf = &src.at (0);
 758     isize = src.size ();
 759     while (isize > 0) {
 760         obuf = buf;
 761         osize = 4096;
 762         rsize = iconv (cd, &ibuf, &isize, &obuf, &osize);
 763 //      if (rsize < 0)
 764         if (obuf - buf <= 0)
 765             break;
 766         ans.append (buf, obuf - buf);
 767     }
 768     iconv_close (cd);
 769     return ans;
 770 }
 771
 772 ustring  padEmpty (const ustring& name) {
 773     if (name.empty ())
 774         return ustring (CharConst ("(null)"));
 775     else
 776         return name;
 777 }
 778
 779 uint32_t  hextoul (uiterator b, uiterator e) {
 780     uint32_t  ans = 0;
 781     int  n;
 782
 783     for (n = 0; n < 8 && b != e; n ++, b ++) {
 784         ans = (ans << 4) + hex (*b);
 785     }
 786     return ans;
 787 }
 788
 789 ustring  toCRLF (const ustring& str) {
 790     uiterator  b = str.begin ();
 791     uiterator  e = str.end ();
 792     umatch  m;
 793     ustring  ans;
 794
 795     while (usearch (b, e, m, re_lf)) {
 796         ans.append (b, m[0].first).append (uCRLF);
 797         b = m[0].second;
 798     }
 799     ans.append (b, e);
 800     return ans;
 801 }
 802
 803 void  skipSpace (uiterator& b, uiterator e) {
 804     while (b < e && *b == ' ') {
 805         b ++;
 806     }
 807 }
 808
 809 static ustring::value_type  toLower_ustring_value (ustring::value_type v) {
 810     if ('A' <= v && v <= 'Z') {
 811         return v - 'A' + 'a';
 812     } else {
 813         return v;
 814     }
 815 }
 816
 817 #if 0
 818 void  toLower (ustring::iterator* b, ustring::iterator* e) {
 819     transform (*b, *e, *b, toLower_ustring_value);
 820 }
 821 #endif
 822
 823 ustring  toLower (uiterator b, uiterator e) {
 824     ustring::iterator  i;
 825     ustring  ans;
 826     ans.resize (e - b);
 827     i = ans.begin ();
 828     for (; b < e; b ++, i++) {
 829         *i = toLower_ustring_value (*b);
 830     }
 831     return ans;
 832 }
 833
 834 static void  format_hex (ustring& ans, MNode* a, std::vector<ustring>& par, bool fcap) {
 835     uint32_t  v = 0;
 836     char  buf[32];
 837
 838     if (a)
 839         v = to_int (a);
 840
 841     if (par.size () > 0) {
 842         int  p = strtol (par[0]);
 843         if (p < 0)
 844             p = 1;
 845         if (p > 20)
 846             p = 20;
 847         if (fcap)
 848             ans.append (buf, snprintf (buf, 32, "%.*X", p, v));
 849         else
 850             ans.append (buf, snprintf (buf, 32, "%.*x", p, v));
 851     } else {
 852         if (fcap)
 853             ans.append (buf, snprintf (buf, 32, "%X", v));
 854         else
 855             ans.append (buf, snprintf (buf, 32, "%x", v));
 856     }
 857 }
 858
 859 static void  format_hex (ustring& ans, MNode* a, std::vector<ustring>& par) {
 860     format_hex (ans, a, par, false);
 861 }
 862
 863 static void  format_HEX (ustring& ans, MNode* a, std::vector<ustring>& par) {
 864     format_hex (ans, a, par, true);
 865 }
 866
 867 static void  format_int_sub (ustring& ans, MNode* a, std::vector<ustring>& par, bool pad0 = false) {
 868     int32_t  v = 0;
 869     char  buf[32];
 870     size_t  s;
 871
 872     if (a)
 873         v = to_int (a);
 874
 875     if (par.size () > 0) {
 876         bool  fclip = false;
 877         bool  fzero = pad0;
 878         bool  fc3 = false;
 879         if (match (par[0], CharConst ("comma")) || match (par[0], CharConst ("c"))) {
 880             ans.append (c3 (to_ustring (v)));
 881         } else {
 882             int  p = strtol (par[0]);
 883             if (p < 0)
 884                 p = 1;
 885             if (p > 20)
 886                 p = 20;
 887             for (int i = 1; i < par.size (); i ++) {
 888                 if (match (par[i], CharConst ("clip"))) {
 889                     fclip = true;
 890                 } else if (match (par[i], CharConst ("0"))) {
 891                     fzero = true;
 892                 } else if (match (par[i], CharConst ("comma")) || match (par[i], CharConst ("c"))) {
 893                     fc3 = true;
 894                 } else {
 895                     throw (par[i] + uErrorBadParam);
 896                 }
 897             }
 898             if (fzero)
 899                 s = snprintf (buf, 32, "%.*d", p, v);
 900             else
 901                 s = snprintf (buf, 32, "%*d", p, v);
 902             if (fclip && s > p)
 903                 ans.append (buf + s - p, p);
 904             else if (! fclip && fc3)
 905                 ans.append (c3 (ustring (buf, s)));
 906             else
 907                 ans.append (buf, s);
 908         }
 909     } else {
 910         ans.append (to_ustring (v));
 911     }
 912 }
 913
 914 static void  format_int (ustring& ans, MNode* a, std::vector<ustring>& par) {
 915     format_int_sub (ans, a, par);
 916 }
 917
 918 static void  format_int0 (ustring& ans, MNode* a, std::vector<ustring>& par) {
 919     format_int_sub (ans, a, par, true);
 920 }
 921
 922 static void  format_int (ustring& ans, MNode* a, int c, bool pad0 = false) {
 923     int32_t  v = 0;
 924     char  buf[32];
 925     size_t  s;
 926
 927     if (a)
 928         v = to_int (a);
 929
 930     if (c > 0) {
 931         if (c > 20)
 932             c = 20;
 933         if (pad0)
 934             s = snprintf (buf, 32, "%.*d", c, v);
 935         else
 936             s = snprintf (buf, 32, "%*d", c, v);
 937         if (s > c)
 938             ans.append (buf + s - c, c);
 939         else
 940             ans.append (buf, s);
 941     } else {
 942         ans.append (to_ustring (v));
 943     }
 944 }
 945
 946 static void  format_float (ustring& ans, MNode* a, std::vector<ustring>& par) {
 947     int  p1 = 0;
 948     int  p2 = 0;
 949     char  buf[32];
 950
 951     if (par.size () > 0)
 952         p1 = strtol (par[0]);
 953     if (par.size () > 1)
 954         p2 = strtol (par[1]);
 955     if (p1 < 0)
 956         p1 = 0;
 957     if (p2 < 0)
 958         p2 = 0;
 959     if (p1 > 20)
 960         p1 = 20;
 961     if (p2 > 20)
 962         p2 = 20;
 963     ans.append (buf, snprintf (buf, 32, "%*.*lf", p1, p2, to_double (a)));
 964 }
 965
 966 static void  format_string (ustring& ans, MNode* a, std::vector<ustring>& par) {
 967     int  p = 0;
 968     bool  fright = false;
 969     ustring  u = to_string (a);
 970
 971     if (par.size () > 0)
 972         p = strtol (par[0]);
 973     if (p > 65536)
 974         p = 65536;
 975     if (par.size () > 1) {
 976         if (match (par[1], CharConst ("right")) || match (par[1], CharConst ("r")))
 977             fright = true;
 978         else
 979             throw (par[1] + uErrorBadParam);
 980     }
 981     if (fright) {
 982         if (u.size () < p)
 983             ans.append (p - u.size (), ' ').append (u);
 984         else
 985             ans.append (u);
 986     } else {
 987         if (u.size () < p)
 988             ans.append (u).append (p - u.size (), ' ');
 989         else
 990             ans.append (u);
 991     }
 992 }
 993
 994 static void  format_literal (ustring& ans, MNode* a, const char* list[], int offset, size_t size) {
 995     int  v;
 996
 997     if (a) {
 998         v = to_int (a) - offset;
 999         if (0 <= v && v < size)
1000             ans.append (list[v]);
1001     }
1002 }
1003
1004 static void  format_month (ustring& ans, MNode* a, std::vector<ustring>& par) {
1005     static const char*  mstr_a[] = {
1006         "Jan", "Feb", "Mar", "Apr",
1007         "May", "Jun", "Jul", "Aug",
1008         "Sep", "Oct", "Nov", "Dec"
1009     };
1010     format_literal (ans, a, mstr_a, 1, 12);
1011 }
1012
1013 static void  format_Month (ustring& ans, MNode* a, std::vector<ustring>& par) {
1014     static const char*  mstr[] = {
1015         "January", "February", "March", "April",
1016         "May", "June", "July", "August",
1017         "September", "October", "November", "December"
1018     };
1019     format_literal (ans, a, mstr, 1, 12);
1020 }
1021
1022 static void  format_week (ustring& ans, MNode* a, std::vector<ustring>& par) {
1023     static const char*  wstr_a[] = {
1024         "Sun", "Mon", "Tue", "Wed",
1025         "Thu", "Fri", "Sat"
1026     };
1027     format_literal (ans, a, wstr_a, 0, 7);
1028 }
1029
1030 static void  format_Week (ustring& ans, MNode* a, std::vector<ustring>& par) {
1031     static const char*  wstr[] = {
1032         "Sunday", "Monday", "Tuesday", "Wednesday",
1033         "Thursday", "Friday", "Saturday"
1034     };
1035     format_literal (ans, a, wstr, 0, 7);
1036 }
1037
1038 ustring  formatString (const ustring& format, boost::ptr_vector<MNodePtr>& par) {
1039     ustring  ans;
1040     uiterator  b, e;
1041     umatch  m;
1042     u_int  i;
1043     MNode*  a;
1044     static uregex  re ("\\$\\{([1-9][0-9]*)(:([a-zA-Z][a-zA-Z0-9]*)(:([0-9a-z.:]+))?)?\\}");
1045     static struct {
1046         const char* name;
1047         size_t  namelen;
1048         void  (*fn)(ustring& ans, MNode* a, std::vector<ustring>& par);
1049     }  formatFunc[] = {
1050         {CharConst ("hex"), format_hex},
1051         {CharConst ("HEX"), format_HEX},
1052         {CharConst ("int"), format_int},
1053         {CharConst ("int0"), format_int0},
1054         {CharConst ("float"), format_float},
1055         {CharConst ("string"), format_string},
1056         {CharConst ("month"), format_month},
1057         {CharConst ("Month"), format_Month},
1058         {CharConst ("week"), format_week},
1059         {CharConst ("Week"), format_Week},
1060         {NULL, 0, NULL}
1061     };
1062
1063     b = format.begin ();
1064     e = format.end ();
1065     while (usearch (b, e, m, re)) {
1066         ans.append (b, m[0].first);
1067         b = m[0].second;
1068         i = strtoul (ustring (m[1].first, m[1].second)) - 1;
1069         if (i < par.size ()) {
1070             a = par[i] ();
1071         } else {
1072             a = NULL;
1073         }
1074         if (! m[2].matched) {
1075             if (a)
1076                 ans.append (to_string (a));
1077         } else {
1078             std::vector<ustring>  fpar;
1079             int  i;
1080             if (m[4].matched)
1081                 split (m[5].first, m[5].second, re_colon, fpar);
1082             for (i = 0; formatFunc[i].name; i ++) {
1083                 if (match (m[3].first, m[3].second, formatFunc[i].name, formatFunc[i].namelen)) {
1084                     (*formatFunc[i].fn) (ans, a, fpar);
1085                     goto Bp1;
1086                 }
1087             }
1088             ans.append (m[0].first, m[0].second);
1089         Bp1:;
1090         }
1091     }
1092     ans.append (b, e);
1093
1094     return ans;
1095 }
1096
1097 /*
1098  ${Y:4}, ${Y:2}
1099  ${M:2}, ${M}
1100  ${D:2}, ${D}
1101  ${h:2}, ${h}
1102  ${m:2}, ${m}
1103  ${s:2}, ${s}
1104  ${W}, ${w}
1105 */
1106 ustring  formatDateString (const ustring& format, boost::ptr_vector<MNodePtr>& par) {
1107     ustring  ans;
1108     uiterator  b, e;
1109     umatch  m;
1110     u_int  i;
1111     MNode*  a;
1112     static uregex  re ("\\$\\{([YMDhmsWw])(:([0-9]))?\\}");
1113
1114     b = format.begin ();
1115     e = format.end ();
1116     while (usearch (b, e, m, re)) {
1117         std::vector<ustring>  fpar;
1118         ans.append (b, m[0].first);
1119         b = m[0].second;
1120         switch (*m[1].first) {
1121         case 'Y':
1122             a = par[0] ();
1123             break;
1124         case 'M':
1125             a = par[1] ();
1126             break;
1127         case 'D':
1128             a = par[2] ();
1129             break;
1130         case 'h':
1131             a = par[3] ();
1132             break;
1133         case 'm':
1134             a = par[4] ();
1135             break;
1136         case 's':
1137             a = par[5] ();
1138             break;
1139         case 'W':
1140         case 'w':
1141             a = par[6] ();
1142             break;
1143         default:
1144             a = NULL;
1145         }
1146
1147         if (! m[2].matched) {
1148             switch (*m[1].first) {
1149             case 'W':
1150                 format_Week (ans, a, fpar);
1151                 break;
1152             case 'w':
1153                 format_week (ans, a, fpar);
1154                 break;
1155             default:
1156                 if (a)
1157                     ans.append (to_string (a));
1158             }
1159         } else {
1160             format_int (ans, a, strtol (ustring (m[3].first, m[3].second)), true);
1161         }
1162     }
1163     ans.append (b, e);
1164
1165     return ans;
1166 }
1167