+#include "config.h"
#include "util_string.h"
#include "util_const.h"
#include "util_random.h"
-#include "ml.h"
-#include "mlenv.h"
-#include "motorenv.h"
+#include "util_splitter.h"
#include "ustring.h"
#include "utf8.h"
#include "utf16.h"
#include <boost/regex.hpp>
#include <boost/regex/pattern_except.hpp>
-#include <iconv.h>
+#include <boost/algorithm/string.hpp>
#include <vector>
#include <algorithm>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
+#include <time.h>
#include <float.h>
#include <ctype.h>
+UIConv::UIConv (const char* in, const char* out) {
+ cd = iconv_open (in, out);
+ if (cd == ICONV_ERR) {
+ throw (ustring (in).append (CharConst (", ")).append (ustring (out)).append (CharConst (": unknown encoding.")));
+ }
+}
+
+ustring UIConv::cv (const ustring& text, bool flush) {
+ ustring ans;
+
+ if (cd != ICONV_ERR) {
+ char* buf = new char[4096];
+ const char* ibuf;
+ char* obuf;
+ size_t isize, osize, rsize;
+
+ ibuf = text.begin ().base ();
+ isize = text.size ();
+ while (isize > 0) {
+ obuf = buf;
+ osize = 4096;
+ rsize = ::iconv (cd, (char**)&ibuf, &isize, &obuf, &osize);
+ if (rsize == -1) {
+ if (errno == EILSEQ) {
+ ibuf ++;
+ isize --;
+ ans.append (CharConst ("_"));
+ } else if (errno == EINVAL) {
+ } else if (errno == E2BIG) {
+ } else {
+ break;
+ }
+ }
+ if (obuf > buf)
+ ans.append (buf, obuf - buf);
+ }
+ if (flush) {
+ obuf = buf;
+ osize = 4096;
+ rsize = ::iconv (cd, NULL, NULL, &obuf, &osize);
+ if (obuf > buf)
+ ans.append (buf, obuf - buf);
+ }
+ delete buf;
+ }
+ return ans;
+}
+
+///////////////////////////////////////////////////////////////////////
+static bool isDigit (int c) {
+ return '0' <= c && c <= '9';
+}
+
ustring c3 (const ustring& str) {
bool qsign = false;
- static uregex re ("^[0-9]+");
- uiterator b, e;
- umatch m;
+ uiterator b, e, t;
b = str.begin ();
e = str.end ();
qsign = true;
b = b + 1;
}
- if (usearch (b, e, m, re)) {
- int n = m[0].second - m[0].first;
+ t = b;
+ if (matchHeadFn (t, e, isDigit)) {
+ int n = t - b;
int l = str.size () + n / 3;
ustring ans;
-
ans.reserve (l);
if (qsign) {
ans.append (1, str[0]);
}
- for (; b != m[0].second; b ++) {
+ for (; b < t; ++ b) {
ans.append (1, *b);
if (n > 1 && n % 3 == 1) {
ans.append (CharConst (","));
}
}
+ustring to_ustring (int32_t v) {
+ return boost::lexical_cast<ustring> (v);
+}
+
+ustring to_ustring (uint32_t v) {
+ return boost::lexical_cast<ustring> (v);
+}
+
+ustring to_ustring (long int v) {
+ return boost::lexical_cast<ustring> (v);
+}
+
+ustring to_ustring (unsigned long int v) {
+ return boost::lexical_cast<ustring> (v);
+}
+
+ustring to_ustring (long long int v) {
+ return boost::lexical_cast<ustring> (v);
+}
+
+ustring to_ustring (unsigned long long int v) {
+ return boost::lexical_cast<ustring> (v);
+}
+
ustring to_ustring (double val) {
char b[32];
return ustring (b, snprintf (b, 32, "%.*g", DBL_DIG, val));
}
+int32_t to_int32 (const ustring& v) {
+ return boost::lexical_cast<int32_t> (v);
+}
+
+uint32_t to_uint32 (const ustring& v) {
+ return boost::lexical_cast<uint32_t> (v);
+}
+
+uint64_t to_uint64 (const ustring& v) {
+ return boost::lexical_cast<uint64_t> (v);
+}
+
+static int shex (char c) {
+ if ('0' <= c && c <= '9') {
+ return (c - '0');
+ } else if ('a' <= c && c <= 'f') {
+ return (c - 'a' + 10);
+ } else if ('A' <= c && c <= 'F') {
+ return (c - 'A' + 10);
+ } else {
+ return -1;
+ }
+}
+
static int hex (char c) {
if ('0' <= c && c <= '9') {
return (c - '0');
static char hexchar (int c) {
if (0 <= c && c <= 9)
return '0' + c;
- else if (10 <= c <= 15)
+ else if (10 <= c && c <= 15)
return 'a' - 10 + c;
else
return '0';
}
+static char hexchar_c (int c) {
+ if (0 <= c && c <= 9)
+ return '0' + c;
+ else if (10 <= c && c <= 15)
+ return 'A' - 10 + c;
+ else
+ return '0';
+}
+
static ustring percentHex (int c) {
ustring ans (3, '%');
return ans;
}
+ustring percentHEX (int c) {
+ ustring ans (3, '%');
+
+ ans[1] = hexchar_c ((c >> 4) & 0x0f);
+ ans[2] = hexchar_c (c & 0x0f);
+ return ans;
+}
+
ustring urldecode_nonul (const ustring& str) {
ustring ans;
static uregex re ("(\\+)|%([0-9a-fA-F][0-9a-fA-F])|\\x00");
return ans;
}
-static ustring omitPattern (const ustring& text, uregex& re) {
- Splitter sp (text, re);
-
- if (sp.next ()) {
- if (sp.match (0)) {
- ustring ans;
- ans.reserve (text.length ());
- if (sp.begin () != sp.end ())
- ans.append (sp.begin (), sp.end ());
- while (sp.next ()) {
- if (sp.begin () != sp.end ())
- ans.append (sp.begin (), sp.end ());
- }
- return ans;
- } else {
- return text;
- }
- } else {
+static ustring omitPattern (const ustring& text, int (*fn)(int)) {
+ uiterator b = text.begin ();
+ uiterator e = text.end ();
+ uiterator p = b;
+ for (; p < e; ++ p) {
+ if (fn (*p))
+ break;
+ }
+ if (p == e) {
return text;
+ } else {
+ ustring ans;
+ ans.reserve (text.length ());
+ ans.assign (b, p);
+ ++ p;
+ for (; p < e; ++ p) {
+ if (! fn (*p))
+ ans.append (1, *p);
+ }
+ return ans;
}
}
ustring omitCtrl (const ustring& str) {
- static uregex re ("[\\x00-\\x1f\\x7f]+");
- return omitPattern (str, re);
+ return omitPattern (str, iscntrl);
+}
+
+static int iscntrlx (int c) {
+ static char table_ctrlx[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
+ };
+ if (0 <= c && c < 128)
+ return table_ctrlx[c];
+ return 0;
+}
+
+ustring omitCtrlX (const ustring& str) {
+ return omitPattern (str, iscntrlx);
+}
+
+static int isNUL (int c) {
+ return c == 0;
+}
+
+ustring omitNul (const ustring& str) {
+ return omitPattern (str, isNUL);
+}
+
+static int iscrlfchar (int c) {
+ return c == 0x0a || c == 0x0d;
}
ustring omitNL (const ustring& str) {
- return omitPattern (str, re_nl);
+ return omitPattern (str, iscrlfchar);
+}
+
+static int isnonasciichar (int c) {
+ return c < 0x20 || c > 0x7e;
}
ustring omitNonAscii (const ustring& str) {
- static uregex re ("[^ -\\x7e]+");
- return omitPattern (str, re);
+ return omitPattern (str, isnonasciichar);
+}
+
+static int isnonasciiword (int c) {
+ return c < 0x21 || c > 0x7e;
}
ustring omitNonAsciiWord (const ustring& str) {
- static uregex re ("[^\\x21-\\x7e]+");
- return omitPattern (str, re);
+ return omitPattern (str, isnonasciiword);
}
-bool to_bool (const ustring& v) {
- if (v.length () == 0 || (v.length () == 1 && v[0] == '0')) {
- return false;
- } else {
- return true;
+static ustring percentEncode (Splitter& sp) {
+ ustring ans;
+ int c;
+ while (sp.nextSep ()) {
+ if (sp.preSize () > 0)
+ ans.append (sp.pre ());
+ c = *sp.matchBegin ();
+ if (c == '\0') {
+ ans.append (uUScore);
+ } else {
+ ans.append (percentHEX (c));
+ }
}
+ if (sp.preSize () > 0)
+ ans.append (sp.pre ());
+ return ans;
}
-static ustring percentEncode (const ustring& text, uregex& re) {
- /* $1 -> _
- $2 -> %HEX
- */
+static bool findPercentChar (uiterator& b, uiterator e, uiterator& u) {
+ static char table_percentchar[] = { // (\x00)|([^A-Za-z0-9_.~\-])
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
+ };
+ int c;
+ for (; b < e; ++ b) {
+ c = *b;
+ if (c < 0 || c >= 128 || table_percentchar[c]) {
+ u = b + 1;
+ return true;
+ }
+ }
+ u = e;
+ return false;
+}
+
+ustring percentEncode (uiterator b, uiterator e) {
+// static uregex re ("(\\x00)|([^A-Za-z0-9_.~-])");
+ SplitterFn sp (b, e, findPercentChar);
+ return percentEncode (sp);
+}
+
+static bool findPercentPathChar (uiterator& b, uiterator e, uiterator& u) {
+ static char table_percentpathchar[] = { // (\x00)|([^A-Za-z0-9_\/.~\-])
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
+ };
+ int c;
+ for (; b < e; ++ b) {
+ c = *b;
+ if (c < 0 || c >= 128 || table_percentpathchar[c]) {
+ u = b + 1;
+ return true;
+ }
+ }
+ u = e;
+ return false;
+}
+
+ustring percentEncode_path (uiterator b, uiterator e) {
+// static uregex re ("(\\x00)|([^A-Za-z0-9_/.~-])");
+ SplitterFn sp (b, e, findPercentPathChar);
+ return percentEncode (sp);
+}
+
+ustring percentDecode (const ustring& str) {
+ ustring ans;
+ static uregex re ("%([0-9a-fA-F][0-9a-fA-F])|\\x00");
umatch m;
uiterator b, e;
- ustring ans;
- b = text.begin ();
- e = text.end ();
- if (b != e && usearch (b, e, m, re)) {
+ b = str.begin ();
+ e = str.end ();
+ while (usearch (b, e, m, re)) {
if (b != m[0].first) {
- ans.append (ustring (b, m[0].first));
+ ans.append (b, m[0].first);
}
if (m[1].matched) {
- ans.append (uUScore);
- } else if (m[2].matched) {
- ans.append (percentHex (*m[2].first));
+ int v = hex (*(m[1].first), *(m[1].first + 1));
+ if (v != 0)
+ ans.append (1, v);
} else {
- assert (0);
}
b = m[0].second;
- while (b != e && usearch (b, e, m, re)) {
- if (b != m[0].first) {
- ans.append (ustring (b, m[0].first));
- }
- if (m[1].matched) {
- ans.append (uUScore);
- } else if (m[2].matched) {
- ans.append (percentHex (*m[2].first));
- } else {
- assert (0);
- }
- b = m[0].second;
- }
- if (b != e) {
- ans.append (ustring (b, e));
- }
- return ans;
- } else {
- return text;
}
+ if (b != e) {
+ ans.append (b, e);
+ }
+
+ return fixUTF8 (ans);
}
-ustring urlencode (const ustring& url) {
- static uregex re ("(\\x00)|([^a-zA-Z0-9_.,/-])");
-
- return percentEncode (url, re);
+static bool findCookieEncChar (uiterator& b, uiterator e, uiterator& u) {
+ static char table_cookieencode[] = { // ([\\x00-\\x1f\\x7f])|([ ,;%\\x80-\\xff])
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
+ };
+ int c;
+ for (; b < e; ++ b) {
+ c = *b;
+ if (c < 0 || c >= 128 || table_cookieencode[c]) {
+ u = b + 1;
+ return true;
+ }
+ }
+ u = e;
+ return false;
}
ustring cookieencode (const ustring& text) {
- static uregex re ("([\\x00-\\x1f\\x7f])|([ ,;%\\x80-\\xff])");
-
- return percentEncode (text, re);
+// static uregex re ("([\\x00-\\x1f\\x7f])|([ ,;%\\x80-\\xff])");
+ SplitterFn sp (text.begin (), text.end (), findCookieEncChar);
+ return percentEncode (sp);
}
ustring cookiedecode (const ustring& text) {
return ans;
}
-ustring dirPart (char* path) {
- char* e = rindex (path, '/');
-
- if (e && e != path) {
- return ustring (path, e - path);
- } else {
- return uSlash;
- }
-}
-
ustring dirPart (const ustring& path) {
ustring::size_type s = path.rfind ('/', path.size ());
if (s == ustring::npos) {
- return uSlash;
+// return uSlash;
+ return uDot;
} else {
return ustring (path.begin (), path.begin () + s);
}
}
void split (uiterator b, uiterator e, uregex& re, std::vector<ustring>& ans) {
- Splitter sp (b, e, re);
+ SplitterRe sp (b, e, re);
while (sp.next ()) {
- ans.push_back (sp.cur ());
+ ans.push_back (sp.pre ());
+ }
+}
+
+void split (uiterator b, uiterator e, int ch, std::vector<ustring>& ans) {
+ SplitterCh sp (b, e, ch);
+
+ while (sp.next ()) {
+ ans.push_back (sp.pre ());
+ }
+}
+
+void splitE (uiterator b, uiterator e, uregex& re, std::vector<ustring>& ans) {
+ SplitterRe sp (b, e, re);
+
+ if (b < e) {
+ while (sp.nextSep ()) {
+ ans.push_back (sp.pre ());
+ }
+ ans.push_back (sp.pre ());
+ }
+}
+
+void splitE (uiterator b, uiterator e, int ch, std::vector<ustring>& ans) {
+ SplitterCh sp (b, e, ch);
+
+ if (b < e) {
+ while (sp.nextSep ()) {
+ ans.push_back (sp.pre ());
+ }
+ ans.push_back (sp.pre ());
}
}
return false;
}
-static char Base64Char[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
-ustring base64Encode (uiterator b, uiterator e) {
- ustring ans;
- size_t size;
- int c0, c1, c2;
-
- while (b != e) {
- size = e - b;
- if (size >= 3) {
- c0 = *b ++;
- c1 = *b ++;
- c2 = *b ++;
- ans.append (1, Base64Char[(c0 >> 2) & 0x3f]);
- ans.append (1, Base64Char[((c0 & 0x03) << 4) | ((c1 >> 4) & 0x0f)]);
- ans.append (1, Base64Char[((c1 & 0x0f) << 2) | ((c2 >> 6) & 0x03)]);
- ans.append (1, Base64Char[c2 & 0x3f]);
- } else if (size == 2) {
- c0 = *b ++;
- c1 = *b ++;
- ans.append (1, Base64Char[(c0 >> 2) & 0x3f]);
- ans.append (1, Base64Char[((c0 & 0x03) << 4) | ((c1 >> 4) & 0x0f)]);
- ans.append (1, Base64Char[((c1 & 0x0f) << 2)]);
- ans.append (1, '=');
- } else if (size == 1) {
- c0 = *b ++;
- ans.append (1, Base64Char[(c0 >> 2) & 0x3f]);
- ans.append (1, Base64Char[((c0 & 0x03) << 4)]);
- ans.append (1, '=');
- ans.append (1, '=');
- } else {
- break;
- }
- }
- return ans;
-}
-
ustring escape_re (const ustring& text) {
ustring::const_iterator b, e;
umatch m;
return (strcmp (crypt (pass.c_str (), cpass.c_str ()), cpass.c_str ()) == 0);
}
-ustring passCrypt (const ustring& pass) {
- ustring salt = makeSalt ();
+ustring passCrypt (const ustring& pass, passCryptFormat format) {
+ // XXX not thread safe.
+ ustring salt;
+ switch (format) {
+ case FORMAT_MD5:
+ salt = makeSalt ('1', 8);
+ break;
+// case FORMAT_BF:
+// salt = makeSalt ('2', 16);
+// break;
+ case FORMAT_SHA256:
+ salt = makeSalt ('5', 16);
+ break;
+ case FORMAT_SHA512:
+ salt = makeSalt ('6', 16);
+ break;
+ default:
+ assert (0);
+ }
return ustring (crypt (pass.c_str (), salt.c_str ()));
}
}
}
-ustring utf16Encode (const ustring& str) {
+static bool jssafe[] = {
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0--15
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 16--31
+ 1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1, // 32--47
+ 1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0, // 48--63
+ 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 64--79
+ 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, // 80--95
+ 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 96--111
+ 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0, // 112--127
+};
+
+ustring jsEncode (const ustring& str) {
int i;
ustring u, ans;
- int c;
+ int c, d;
char b[8];
u = utf8to16 (str);
b[1] = 'u';
for (i = 0; i < u.size (); i += 2) {
c = u[i];
- b[2] = hexchar ((c >> 4) & 0x0f);
- b[3] = hexchar (c & 0x0f);
- c = u[i + 1];
- b[4] = hexchar ((c >> 4) & 0x0f);
- b[5] = hexchar (c & 0x0f);
- ans.append (b, 6);
+ d = u[i + 1];
+ if (c == 0 && 0 < d && d < 127 && jssafe[d]) {
+ ans.append (1, d);
+ } else {
+ b[2] = hexchar ((c >> 4) & 0x0f);
+ b[3] = hexchar (c & 0x0f);
+ b[4] = hexchar ((d >> 4) & 0x0f);
+ b[5] = hexchar (d & 0x0f);
+ ans.append (b, 6);
+ }
}
return ans;
}
ustring filenameEncode (const ustring& text) {
static uregex re ("([\\x00-\\x1f\\x7f])|([^a-zA-Z0-9._-])|(^\\.+)");
- Splitter sp (text, re);
+ SplitterRe sp (text, re);
ustring ans;
int c;
return ans;
}
+ustring filenameDecode (const ustring& text) {
+ static uregex re (":([0-9a-fA-F][0-9a-fA-F])");
+ SplitterRe sp (text, re);
+ ustring ans;
+ int c;
+
+ ans.reserve (text.length ());
+ while (sp.next ()) {
+ if (sp.begin () < sp.end ())
+ ans.append (sp.begin (), sp.end ());
+ if (sp.match (1)) {
+ c = hex (*(sp.matchBegin (1))) * 16 + hex (*(sp.matchBegin (1) + 1));
+ if (32 <= c && c < 256)
+ ans.append (1, c);
+ }
+ }
+ return ans;
+}
+
bool matchSkip (uiterator& b, uiterator e, const char* t, size_t s) {
if (e - b >= s && memcmp (t, &b[0], s) == 0) {
b += s;
}
}
-bool wsearch (const ustring& text, boost::wsmatch& m, const ustring& reg, boost::wregex::flag_type reg_flags, boost::match_flag_type search_flags) {
- try {
- std::wstring wtext = utow (text);
- std::wstring wreg = utow (reg);
- boost::wregex wre (wreg, reg_flags);
- return regex_search (wtext, m, wre, search_flags);
- } catch (boost::regex_error& err) {
- throw (uErrorRegexp);
- }
-}
-
-bool wsearch_env (MlEnv* mlenv, const ustring& text, boost::wsmatch& m, const ustring& reg, boost::wregex::flag_type reg_flags, boost::match_flag_type search_flags) {
- try {
- mlenv->env->regtext = utow (text);
- std::wstring wreg = utow (reg);
- boost::wregex wre (wreg, reg_flags);
- return regex_search (mlenv->env->regtext, m, wre, search_flags);
- } catch (boost::regex_error& err) {
- throw (uErrorRegexp);
- }
-}
-
-ustring uiconv (const ustring& src, const char* tocode, const char* fromcode) {
- iconv_t cd;
- char buf[4096];
- const char* ibuf;
- char* obuf;
- size_t isize, osize, rsize;
- ustring ans;
-
- cd = iconv_open (tocode, fromcode);
- if (cd == (iconv_t)(-1))
- throw (ustring ("bad encoding name."));
- ibuf = &src.at (0);
- isize = src.size ();
- while (isize > 0) {
- obuf = buf;
- osize = 4096;
- rsize = iconv (cd, &ibuf, &isize, &obuf, &osize);
-// if (rsize < 0)
- if (obuf - buf <= 0)
- break;
- ans.append (buf, obuf - buf);
- }
- iconv_close (cd);
- return ans;
-}
-
ustring padEmpty (const ustring& name) {
if (name.empty ())
return ustring (CharConst ("(null)"));
return ans;
}
+double hextod (uiterator b, uiterator e, int base) {
+ double ans = 0.0;
+ int n;
+ int c;
+
+ for (n = 0; b < e; n ++, b ++) {
+ c = shex (*b);
+ if (c < 0 || c >= base)
+ return ans;
+ ans = ans * 16. + c;
+ }
+ return ans;
+}
+
+ustring dtohex (double e, int pad, int base, bool upcase) {
+ double a, b;
+ int r;
+ ustring ans;
+ char d[128];
+ int pos;
+ const char* digs;
+ static const char xdigsLower[] = "0123456789abcdef";
+ static const char xdigsUpper[] = "0123456789ABCDEF";
+
+ pos = 128;
+ b = base;
+ if (upcase)
+ digs = xdigsUpper;
+ else
+ digs = xdigsLower;
+ if (e >= 0) {
+ e = floor (e);
+ while (pos > 0 && e > 0) {
+ a = floor (e / b);
+ r = e - a * b;
+ e = a;
+ if (r < 0) {
+ r = 0;
+ } else if (r >= base) {
+ r = base - 1;
+ }
+ d[--pos] = digs[r];
+ }
+ if (pad > 0) {
+ for (int i = 128 - pos; i < pad && i < 128; i ++) {
+ d[--pos] = '0';
+ }
+ }
+ ans.assign (d + pos, 128 - pos);
+ } else {
+ /* *** */
+ }
+ return ans;
+}
+
ustring toCRLF (const ustring& str) {
uiterator b = str.begin ();
uiterator e = str.end ();
- umatch m;
+ uiterator p;
ustring ans;
- while (usearch (b, e, m, re_lf)) {
- ans.append (b, m[0].first).append (uCRLF);
- b = m[0].second;
+ p = b;
+ while (findChar (b, e, '\n')) {
+ ans.append (p, b).append (uCRLF);
+ p = ++ b;
}
- ans.append (b, e);
+ if (p < e)
+ ans.append (p, e);
return ans;
}
-void skipSpace (uiterator& b, uiterator e) {
- while (b < e && *b == ' ') {
- b ++;
+void skipChar (uiterator& b, uiterator e, int ch) {
+ while (b < e && *b == ch)
+ ++ b;
+}
+
+void skipNextToChar (uiterator& b, uiterator e, int ch) {
+ while (b < e) {
+ if (*(b ++) == ch)
+ return;
}
}
}
}
-#if 0
-void toLower (ustring::iterator* b, ustring::iterator* e) {
- transform (*b, *e, *b, toLower_ustring_value);
-}
-#endif
-
ustring toLower (uiterator b, uiterator e) {
ustring::iterator i;
ustring ans;
return ans;
}
-static void format_hex (ustring& ans, MNode* a, std::vector<ustring>& par, bool fcap) {
- uint32_t v = 0;
- char buf[32];
-
- if (a)
- v = to_int (a);
+static ustring colpad0 (int n, const ustring& src) {
+ int m;
- if (par.size () > 0) {
- int p = strtol (par[0]);
- if (p < 0)
- p = 1;
- if (p > 20)
- p = 20;
- if (fcap)
- ans.append (buf, snprintf (buf, 32, "%.*X", p, v));
- else
- ans.append (buf, snprintf (buf, 32, "%.*x", p, v));
+ if (n > 0) {
+ n = std::min (32, n);
+ m = n - src.length ();
+ if (m > 0) {
+ ustring ans;
+ ans.reserve (n);
+ ans.append (m, '0');
+ ans.append (src);
+ return ans;
+ } else if (m == 0) {
+ return src;
+ } else {
+ return ustring (src.end () - n, src.end ());
+ }
} else {
- if (fcap)
- ans.append (buf, snprintf (buf, 32, "%X", v));
- else
- ans.append (buf, snprintf (buf, 32, "%x", v));
+ return src;
}
}
-static void format_hex (ustring& ans, MNode* a, std::vector<ustring>& par) {
- format_hex (ans, a, par, false);
-}
-
-static void format_HEX (ustring& ans, MNode* a, std::vector<ustring>& par) {
- format_hex (ans, a, par, true);
-}
-
-static void format_int_sub (ustring& ans, MNode* a, std::vector<ustring>& par, bool pad0 = false) {
- int32_t v = 0;
- char buf[32];
- size_t s;
-
- if (a)
- v = to_int (a);
+/*
+ ${Y:4}, ${Y:2}
+ ${M:2}, ${M}, ${M:name}, ${M:ab}
+ ${D:2}, ${D}
+ ${h:2}, ${h}
+ ${m:2}, ${m}
+ ${s:2}, ${s}
+ ${W}, ${w}
+ ${o}
+*/
+ustring formatDateString (const ustring& format, struct tm& v) {
+ ustring ans;
+ uiterator b, e;
+ umatch m;
+ int pc;
+ static uregex re ("\\$\\{(([YMDhmsWwo])(:([0-9]))?|M:((name)|(ab)|(abname)))\\}");
+ std::vector<ustring> fpar;
- if (par.size () > 0) {
- bool fclip = false;
- bool fzero = pad0;
- bool fc3 = false;
- if (match (par[0], CharConst ("comma")) || match (par[0], CharConst ("c"))) {
- ans.append (c3 (to_ustring (v)));
+ b = format.begin ();
+ e = format.end ();
+ while (usearch (b, e, m, re)) {
+ ans.append (b, m[0].first);
+ b = m[0].second;
+ if (m[5].matched) {
+ if (m[6].matched) { // name
+ ans.append (MStr[v.tm_mon]);
+ } else if (m[7].matched || m[8].matched) { // abname
+ ans.append (MStr_a[v.tm_mon]);
+ }
} else {
- int p = strtol (par[0]);
- if (p < 0)
- p = 1;
- if (p > 20)
- p = 20;
- for (int i = 1; i < par.size (); i ++) {
- if (match (par[i], CharConst ("clip"))) {
- fclip = true;
- } else if (match (par[i], CharConst ("0"))) {
- fzero = true;
- } else if (match (par[i], CharConst ("comma")) || match (par[i], CharConst ("c"))) {
- fc3 = true;
- } else {
- throw (par[i] + uErrorBadParam);
+ if (m[3].matched) {
+ pc = strtol (ustring (m[4].first, m[4].second));
+ } else {
+ pc = 0;
+ }
+ switch (*m[2].first) {
+ case 'Y':
+ ans.append (colpad0 (pc, to_ustring (v.tm_year + 1900)));
+ break;
+ case 'M':
+ ans.append (colpad0 (pc, to_ustring (v.tm_mon + 1)));
+ break;
+ case 'D':
+ ans.append (colpad0 (pc, to_ustring (v.tm_mday)));
+ break;
+ case 'h':
+ ans.append (colpad0 (pc, to_ustring (v.tm_hour)));
+ break;
+ case 'm':
+ ans.append (colpad0 (pc, to_ustring (v.tm_min)));
+ break;
+ case 's':
+ ans.append (colpad0 (pc, to_ustring (v.tm_sec)));
+ break;
+ case 'W':
+ ans.append (WStr [v.tm_wday]);
+ break;
+ case 'w':
+ ans.append (WStr_a [v.tm_wday]);
+ break;
+ case 'o':
+ {
+ int h, m;
+ if (v.tm_gmtoff < 0) {
+ h = - v.tm_gmtoff / 60;
+ m = h % 60;
+ h = h / 60;
+ ans.append (CharConst ("-")).append (colpad0 (4, to_ustring (h * 100 + m)));
+ } else {
+ h = v.tm_gmtoff / 60;
+ m = h % 60;
+ h = h / 60;
+ ans.append (CharConst ("+")).append (colpad0 (4, to_ustring (h * 100 + m)));
+ }
}
+ break;
}
- if (fzero)
- s = snprintf (buf, 32, "%.*ld", p, v);
- else
- s = snprintf (buf, 32, "%*ld", p, v);
- if (fclip && s > p)
- ans.append (buf + s - p, p);
- else if (! fclip && fc3)
- ans.append (c3 (ustring (buf, s)));
- else
- ans.append (buf, s);
}
- } else {
- ans.append (to_ustring (v));
}
-}
+ ans.append (b, e);
-static void format_int (ustring& ans, MNode* a, std::vector<ustring>& par) {
- format_int_sub (ans, a, par);
+ return ans;
}
-static void format_int0 (ustring& ans, MNode* a, std::vector<ustring>& par) {
- format_int_sub (ans, a, par, true);
+ustring toLower (const ustring& str) {
+ return boost::to_lower_copy (str);
}
-static void format_int (ustring& ans, MNode* a, int c, bool pad0 = false) {
- int32_t v = 0;
- char buf[32];
- size_t s;
+ustring toUpper (const ustring& str) {
+ return boost::to_upper_copy (str);
+}
- if (a)
- v = to_int (a);
+ustring hexEncode (const ustring& data, bool upcase) {
+ ustring ans;
+ uiterator b, e;
+ char (*fn) (int);
- if (c > 0) {
- if (c > 20)
- c = 20;
- if (pad0)
- s = snprintf (buf, 32, "%.*ld", c, v);
- else
- s = snprintf (buf, 32, "%*ld", c, v);
- if (s > c)
- ans.append (buf + s - c, c);
- else
- ans.append (buf, s);
- } else {
- ans.append (to_ustring (v));
- }
-}
-
-static void format_float (ustring& ans, MNode* a, std::vector<ustring>& par) {
- int p1 = 0;
- int p2 = 0;
- char buf[32];
-
- if (par.size () > 0)
- p1 = strtol (par[0]);
- if (par.size () > 1)
- p2 = strtol (par[1]);
- if (p1 < 0)
- p1 = 0;
- if (p2 < 0)
- p2 = 0;
- if (p1 > 20)
- p1 = 20;
- if (p2 > 20)
- p2 = 20;
- ans.append (buf, snprintf (buf, 32, "%*.*lf", p1, p2, to_double (a)));
-}
-
-static void format_string (ustring& ans, MNode* a, std::vector<ustring>& par) {
- int p = 0;
- bool fright = false;
- ustring u = to_string (a);
-
- if (par.size () > 0)
- p = strtol (par[0]);
- if (p > 65536)
- p = 65536;
- if (par.size () > 1) {
- if (match (par[1], CharConst ("right")) || match (par[1], CharConst ("r")))
- fright = true;
- else
- throw (par[1] + uErrorBadParam);
- }
- if (fright) {
- if (u.size () < p)
- ans.append (p - u.size (), ' ').append (u);
- else
- ans.append (u);
- } else {
- if (u.size () < p)
- ans.append (u).append (p - u.size (), ' ');
- else
- ans.append (u);
+ if (upcase)
+ fn = hexchar_c;
+ else
+ fn = hexchar;
+ ans.reserve (data.length () * 2);
+ b = data.begin ();
+ e = data.end ();
+ for (; b < e; b ++) {
+ ans.append (1, fn ((*b >> 4) & 0x0f));
+ ans.append (1, fn (*b & 0x0f));
}
+ return ans;
}
-static void format_literal (ustring& ans, MNode* a, const char* list[], int offset, size_t size) {
- int v;
+ustring hexDecode (const ustring& data) {
+ ustring ans;
+ uiterator b, e;
+ int c;
- if (a) {
- v = to_int (a) - offset;
- if (0 <= v && v < size)
- ans.append (list[v]);
+ ans.reserve (data.length () / 2);
+ b = data.begin ();
+ e = data.end ();
+ for (; b < e; b ++) {
+ c = *b ++;
+ if (b < e) {
+ ans.append (1, hex (c, *b));
+ }
}
+ return ans;
}
-static void format_month (ustring& ans, MNode* a, std::vector<ustring>& par) {
- static const char* mstr_a[] = {
- "Jan", "Feb", "Mar", "Apr",
- "May", "Jun", "Jul", "Aug",
- "Sep", "Oct", "Nov", "Dec"
- };
- format_literal (ans, a, mstr_a, 1, 12);
+int octchar (uiterator b) { // 3bytes
+ int ans = 0;
+ ans = *b - '0';
+ ++ b;
+ ans = ans * 8 + *b - '0';
+ ++ b;
+ ans = ans * 8 + *b - '0';
+ return ans;
}
-static void format_Month (ustring& ans, MNode* a, std::vector<ustring>& par) {
- static const char* mstr[] = {
- "January", "February", "March", "April",
- "May", "June", "July", "August",
- "September", "October", "November", "December"
- };
- format_literal (ans, a, mstr, 1, 12);
+ustring octchar (int c) {
+ ustring ans (3, 0);
+ ans[2] = (c & 0x7) + '0';
+ c >>= 3;
+ ans[1] = (c & 0x7) + '0';
+ c >>= 3;
+ ans[0] = (c & 0x3) + '0';
+ return ans;
}
-static void format_week (ustring& ans, MNode* a, std::vector<ustring>& par) {
- static const char* wstr_a[] = {
- "Sun", "Mon", "Tue", "Wed",
- "Thu", "Fri", "Sat"
- };
- format_literal (ans, a, wstr_a, 0, 7);
+bool findNL (uiterator& b, uiterator e, uiterator& u) {
+ for (; b < e; ++ b) {
+ if (*b == '\n') {
+ u = b + 1;
+ return true;
+ } else if (*b == '\r') {
+ u = b + 1;
+ if (u < e && *u == '\n')
+ ++ u;
+ return true;
+ }
+ }
+ u = e;
+ return false;
}
-static void format_Week (ustring& ans, MNode* a, std::vector<ustring>& par) {
- static const char* wstr[] = {
- "Sunday", "Monday", "Tuesday", "Wednesday",
- "Thursday", "Friday", "Saturday"
- };
- format_literal (ans, a, wstr, 0, 7);
+bool findNLb (uiterator& b, uiterator e) {
+ for (; b < e; ++ b) {
+ if (*b == '\n') {
+ ++ b;
+ return true;
+ } else if (*b == '\r') {
+ ++ b;
+ if (b < e && *b == '\n')
+ ++ b;
+ return true;
+ }
+ }
+ return false;
}
-ustring formatString (const ustring& format, boost::ptr_vector<MNodePtr>& par) {
- ustring ans;
- uiterator b, e;
- umatch m;
- u_int i;
- MNode* a;
- static uregex re ("\\$\\{([1-9][0-9]*)(:([a-zA-Z][a-zA-Z0-9]*)(:([0-9a-z.:]+))?)?\\}");
- static struct {
- const char* name;
- size_t namelen;
- void (*fn)(ustring& ans, MNode* a, std::vector<ustring>& par);
- } formatFunc[] = {
- {CharConst ("hex"), format_hex},
- {CharConst ("HEX"), format_HEX},
- {CharConst ("int"), format_int},
- {CharConst ("int0"), format_int0},
- {CharConst ("float"), format_float},
- {CharConst ("string"), format_string},
- {CharConst ("month"), format_month},
- {CharConst ("Month"), format_Month},
- {CharConst ("week"), format_week},
- {CharConst ("Week"), format_Week},
- {NULL, 0, NULL}
- };
-
- b = format.begin ();
- e = format.end ();
- while (usearch (b, e, m, re)) {
- ans.append (b, m[0].first);
- b = m[0].second;
- i = strtoul (ustring (m[1].first, m[1].second)) - 1;
- if (i < par.size ()) {
- a = par[i] ();
- } else {
- a = NULL;
+bool findChar (uiterator& b, uiterator e, int ch) {
+ for (; b < e; ++ b) {
+ if (*b == ch) {
+ return true;
}
- if (! m[2].matched) {
- if (a)
- ans.append (to_string (a));
- } else {
- std::vector<ustring> fpar;
- int i;
- if (m[4].matched)
- split (m[5].first, m[5].second, re_colon, fpar);
- for (i = 0; formatFunc[i].name; i ++) {
- if (match (m[3].first, m[3].second, formatFunc[i].name, formatFunc[i].namelen)) {
- (*formatFunc[i].fn) (ans, a, fpar);
- goto Bp1;
- }
- }
- ans.append (m[0].first, m[0].second);
- Bp1:;
+ }
+ return false;
+}
+
+bool findChars (uiterator& b, uiterator e, const ustring& pattern) {
+ for (; b < e; ++ b) {
+ if (pattern.find (*b) != ustring::npos) {
+ return true;
}
}
- ans.append (b, e);
+ return false;
+}
- return ans;
+bool findCharFn (uiterator& b, uiterator e, bool (*fn)(int)) {
+ for (; b < e; ++ b) {
+ if (fn (*b))
+ return true;
+ }
+ return false;
}
-/*
- ${Y:4}, ${Y:2}
- ${M:2}, ${M}
- ${D:2}, ${D}
- ${h:2}, ${h}
- ${m:2}, ${m}
- ${s:2}, ${s}
- ${W}, ${w}
-*/
-ustring formatDateString (const ustring& format, boost::ptr_vector<MNodePtr>& par) {
- ustring ans;
- uiterator b, e;
- umatch m;
- u_int i;
- MNode* a;
- static uregex re ("\\$\\{([YMDhmsWw])(:([0-9]))?\\}");
+bool findSepColon (uiterator& b, uiterator e, uiterator& u) {
+ // " *; *"を探索する。bは進む
+ uiterator p = b;
+ if (findChar (b, e, ';')) {
+ u = b + 1;
+ while (p < b && *(b - 1) == ' ')
+ -- b;
+ while (u < e && *u == ' ')
+ ++ u;
+ return true;
+ }
+ u = e;
+ return false;
+}
- b = format.begin ();
- e = format.end ();
- while (usearch (b, e, m, re)) {
- std::vector<ustring> fpar;
- ans.append (b, m[0].first);
- b = m[0].second;
- switch (*m[1].first) {
- case 'Y':
- a = par[0] ();
- break;
- case 'M':
- a = par[1] ();
- break;
- case 'D':
- a = par[2] ();
- break;
- case 'h':
- a = par[3] ();
- break;
- case 'm':
- a = par[4] ();
- break;
- case 's':
- a = par[5] ();
- break;
- case 'W':
- case 'w':
- a = par[6] ();
- break;
- default:
- a = NULL;
- }
+bool matchHeadFn (uiterator& b, uiterator e, bool (*fn)(int)) {
+ if (b < e && fn (*b)) {
+ do {
+ ++ b;
+ } while (b < e && fn (*b));
+ return true;
+ }
+ return false;
+}
- if (! m[2].matched) {
- switch (*m[1].first) {
- case 'W':
- format_Week (ans, a, fpar);
- break;
- case 'w':
- format_week (ans, a, fpar);
- break;
- default:
- if (a)
- ans.append (to_string (a));
+bool matchWordTbl (uiterator b, uiterator e, char* tbl) {
+ int c;
+ if (b < e) {
+ do {
+ c = *b;
+ if (0 <= c && c < 128 && tbl[c]) { // 128〜はfalse
+ } else {
+ return false;
}
- } else {
- format_int (ans, a, strtol (ustring (m[3].first, m[3].second)), true);
- }
+ ++ b;
+ } while (b < e);
+ return true;
+ } else {
+ return false;
}
- ans.append (b, e);
-
- return ans;
}
+bool matchWordFn (uiterator b, uiterator e, bool (*fn)(int)) {
+ int c;
+ if (b < e) {
+ do {
+ c = *b;
+ if (0 <= c && c < 128 && fn (c)) {
+ } else {
+ return false;
+ }
+ ++ b;
+ } while (b < e);
+ return true;
+ } else {
+ return false;
+ }
+}