From c1bd5fbc2540821b5d18a1edfe1acede0c148874 Mon Sep 17 00:00:00 2001 From: visor Date: Thu, 23 Sep 2010 00:40:15 +0900 Subject: [PATCH] fix string utils. --- ext/ml-sqlite3.cc | 1 + lib/http.cc | 4 +- lib/iso2022jp.cc | 1 + lib/ml.cc | 1 + lib/motor.cc | 3 +- lib/utf8.cc | 3 +- lib/util_apache.cc | 1 + lib/util_base64.cc | 89 ++++++++++++++++++++++++++++++ lib/util_base64.h | 9 +++ lib/util_splitter.h | 121 ++++++++++++++++++++++++++++++++++++++++ lib/util_string.cc | 37 +------------ lib/util_string.h | 150 -------------------------------------------------- lib/util_wsplitter.h | 40 ++++++++++++++ ml/Makefile | 1 + modules/ml-formvar.cc | 3 +- modules/ml-http.cc | 25 +++++---- modules/ml-string.cc | 1 + wiki/wikiformat.h | 1 + wiki/wikimotor.h | 1 + 19 files changed, 292 insertions(+), 200 deletions(-) create mode 100644 lib/util_base64.cc create mode 100644 lib/util_base64.h create mode 100644 lib/util_splitter.h create mode 100644 lib/util_wsplitter.h diff --git a/ext/ml-sqlite3.cc b/ext/ml-sqlite3.cc index 2cc5fbe..91edb4a 100644 --- a/ext/ml-sqlite3.cc +++ b/ext/ml-sqlite3.cc @@ -7,6 +7,7 @@ #include "expr.h" #include "util_const.h" #include "util_file.h" +#include "util_splitter.h" #include "util_string.h" #include "utf8.h" #include diff --git a/lib/http.cc b/lib/http.cc index 57ccfb8..2568d69 100644 --- a/lib/http.cc +++ b/lib/http.cc @@ -4,9 +4,11 @@ #include "motoroutput.h" #include "util_string.h" #include "util_time.h" +#include "util_base64.h" #include "util_check.h" #include "util_const.h" #include "util_random.h" +#include "util_splitter.h" #include "util_tcp.h" #include "ustring.h" #include "utf8.h" @@ -337,7 +339,7 @@ ustring HTTPSend::query () { idpw.assign (id).append (uColon).append (pw); q.append (CharConst ("Authorization: Basic ")).append (base64Encode (idpw.begin (), idpw.end ())).append (uCRLF); } - if (proxyid.length () > 0) { + if (useproxy && proxyid.length () > 0) { ustring idpw; idpw.assign (proxyid).append (uColon).append (proxypw); q.append (CharConst ("Proxy-Authorization: Basic ")).append (base64Encode (idpw.begin (), idpw.end ())).append (uCRLF); diff --git a/lib/iso2022jp.cc b/lib/iso2022jp.cc index eba6f2b..3ca997a 100644 --- a/lib/iso2022jp.cc +++ b/lib/iso2022jp.cc @@ -1,4 +1,5 @@ #include "iso2022jp.h" +#include "util_base64.h" #include "util_string.h" #include "ustring.h" diff --git a/lib/ml.cc b/lib/ml.cc index d853287..fc3eb09 100644 --- a/lib/ml.cc +++ b/lib/ml.cc @@ -3,6 +3,7 @@ #include "mlenv.h" #include "expr.h" #include "utf8.h" +#include "utf16.h" #include "ustring.h" #include "util_string.h" #include "util_const.h" diff --git a/lib/motor.cc b/lib/motor.cc index 658b559..67467a4 100644 --- a/lib/motor.cc +++ b/lib/motor.cc @@ -4,8 +4,9 @@ #include "mlenv.h" #include "expr.h" #include "util_const.h" -#include "util_string.h" #include "util_file.h" +#include "util_splitter.h" +#include "util_string.h" #include "ustring.h" #include "mftable.h" #include "motorconst.h" diff --git a/lib/utf8.cc b/lib/utf8.cc index 586ad1e..423adba 100644 --- a/lib/utf8.cc +++ b/lib/utf8.cc @@ -15,7 +15,8 @@ ustring fixUTF8 (const ustring& str) { ans.reserve (n); for (i = 0; i < n;) { c = str[i ++]; - if (c <= 0x7f) { + if (c == 0) { + } else if (c <= 0x7f) { if (c == '\r') { ans.append (1, '\n'); if (i < n && str[i] == '\n') { diff --git a/lib/util_apache.cc b/lib/util_apache.cc index 8f5cc0e..9d1307e 100644 --- a/lib/util_apache.cc +++ b/lib/util_apache.cc @@ -1,6 +1,7 @@ #include "util_apache.h" #include "util_const.h" #include "util_file.h" +#include "util_splitter.h" #include "util_string.h" #include "httpconst.h" #include "ustring.h" diff --git a/lib/util_base64.cc b/lib/util_base64.cc new file mode 100644 index 0000000..59fedf9 --- /dev/null +++ b/lib/util_base64.cc @@ -0,0 +1,89 @@ +#include "util_base64.h" +#include "ustring.h" +#include + +static char Base64Char[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +ustring base64Encode (uiterator b, uiterator e) { + ustring ans; + size_t size; + int c0, c1, c2; + + while (b != e) { + size = e - b; + if (size >= 3) { + c0 = *b ++; + c1 = *b ++; + c2 = *b ++; + ans.append (1, Base64Char[(c0 >> 2) & 0x3f]); + ans.append (1, Base64Char[((c0 & 0x03) << 4) | ((c1 >> 4) & 0x0f)]); + ans.append (1, Base64Char[((c1 & 0x0f) << 2) | ((c2 >> 6) & 0x03)]); + ans.append (1, Base64Char[c2 & 0x3f]); + } else if (size == 2) { + c0 = *b ++; + c1 = *b ++; + ans.append (1, Base64Char[(c0 >> 2) & 0x3f]); + ans.append (1, Base64Char[((c0 & 0x03) << 4) | ((c1 >> 4) & 0x0f)]); + ans.append (1, Base64Char[((c1 & 0x0f) << 2)]); + ans.append (1, '='); + } else if (size == 1) { + c0 = *b ++; + ans.append (1, Base64Char[(c0 >> 2) & 0x3f]); + ans.append (1, Base64Char[((c0 & 0x03) << 4)]); + ans.append (1, '='); + ans.append (1, '='); + } else { + break; + } + } + return ans; +} + +ustring base64Decode (uiterator b, uiterator e) { + ustring ans; + size_t size; + u_int c0, c1, c2, c3; + char* p; + u_int c, x; + + size = e - b; + while (size >= 4) { + c0 = *b ++; + size --; + if (isspace (c0)) { + } else { + c1 = *b ++; + c2 = *b ++; + c3 = *b ++; + size -= 3; + x = 0; + + p = (char*)memchr (Base64Char, c0, sizeof (Base64Char) - 1); + if (p == NULL) + break; + x = (p - Base64Char) << 2; + + p = (char*)memchr (Base64Char, c1, sizeof (Base64Char) - 1); + if (p == NULL) + break; + c = p - Base64Char; + ans.append (1, (x | (c >> 4))); + x = c << 4; + + p = (char*)memchr (Base64Char, c2, sizeof (Base64Char) - 1); + if (p == NULL) + break; + c = p - Base64Char; + ans.append (1, (x | (c >> 2))); + x = c << 6; + + p = (char*)memchr (Base64Char, c3, sizeof (Base64Char) - 1); + if (p == NULL) + break; + c = p - Base64Char; + ans.append (1, (x | c)); + } + } + return ans; +} + diff --git a/lib/util_base64.h b/lib/util_base64.h new file mode 100644 index 0000000..62267e6 --- /dev/null +++ b/lib/util_base64.h @@ -0,0 +1,9 @@ +#ifndef UTIL_BASE64_H +#define UTIL_BASE64_H + +#include "ustring.h" + +ustring base64Encode (uiterator b, uiterator e); +ustring base64Decode (uiterator b, uiterator e); + +#endif /* UTIL_BASE64_H */ diff --git a/lib/util_splitter.h b/lib/util_splitter.h new file mode 100644 index 0000000..cc3117d --- /dev/null +++ b/lib/util_splitter.h @@ -0,0 +1,121 @@ +#ifndef UTIL_SPLITTER_H +#define UTIL_SPLITTER_H + +#include "ustring.h" + +class Splitter { + public: + uregex* re; + uiterator b; + uiterator t; + uiterator u; + uiterator e; + umatch m; + + Splitter (const ustring& text, uregex& r) { + b = t = u = text.begin (); + e = text.end (); + re = &r; + }; + Splitter (uiterator pb, uiterator pe, uregex& r) { + b = t = u = pb; + e = pe; + re = &r; + }; + virtual ~Splitter () {}; + virtual void init (uiterator pb, uiterator pe) { + b = t = u = pb; + e = pe; + }; + virtual bool isEnd () { + return b == e; + }; + virtual bool next () { + b = u; + if (b != e) { + if (usearch (b, e, m, *re)) { + t = m[0].first; + u = m[0].second; + } else { + t = e; + u = e; + } + return true; + } else { + return false; + } + }; + virtual bool nextSep () { + b = u; + if (b != e) { + if (usearch (b, e, m, *re)) { + t = m[0].first; + u = m[0].second; + return true; + } else { + t = e; + u = e; + return false; + } + } else { + return false; + } + }; + virtual uiterator begin () { + return b; + }; + virtual uiterator end () { + return t; + }; + virtual ustring cur () { + return ustring (b, t); + }; + virtual bool match (int index) { + return (t != u && m[index].matched); + } + virtual uiterator matchBegin () { + return t; + }; + virtual uiterator matchBegin (int index) { + return m[index].first; + }; + virtual uiterator matchEnd () { + return u; + }; + virtual uiterator matchEnd (int index) { + return m[index].second; + }; + virtual uiterator eol () { + return e; + }; + virtual void rewind (int i) { + int n = u - t; + if (n > i) { + u -= i; + } else { + u -= n; + } + }; + virtual bool nextSearch () { + if (u != e) { + if (usearch (u, e, m, *re)) { + t = m[0].first; + u = m[0].second; + return true; + } else { + t = e; + u = e; + return false; + } + } else { + t = e; + u = e; + return false; + } + }; + virtual void shiftCursor () { + b = u; + }; +}; + +#endif /* UTIL_SPLITTER_H */ diff --git a/lib/util_string.cc b/lib/util_string.cc index 52ce8dc..e78523a 100644 --- a/lib/util_string.cc +++ b/lib/util_string.cc @@ -1,6 +1,7 @@ #include "util_string.h" #include "util_const.h" #include "util_random.h" +#include "util_splitter.h" #include "ml.h" #include "mlenv.h" #include "motorenv.h" @@ -317,42 +318,6 @@ bool splitChar (uiterator b, uiterator e, uiterator::value_type ch, uiterator& return false; } -static char Base64Char[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; -ustring base64Encode (uiterator b, uiterator e) { - ustring ans; - size_t size; - int c0, c1, c2; - - while (b != e) { - size = e - b; - if (size >= 3) { - c0 = *b ++; - c1 = *b ++; - c2 = *b ++; - ans.append (1, Base64Char[(c0 >> 2) & 0x3f]); - ans.append (1, Base64Char[((c0 & 0x03) << 4) | ((c1 >> 4) & 0x0f)]); - ans.append (1, Base64Char[((c1 & 0x0f) << 2) | ((c2 >> 6) & 0x03)]); - ans.append (1, Base64Char[c2 & 0x3f]); - } else if (size == 2) { - c0 = *b ++; - c1 = *b ++; - ans.append (1, Base64Char[(c0 >> 2) & 0x3f]); - ans.append (1, Base64Char[((c0 & 0x03) << 4) | ((c1 >> 4) & 0x0f)]); - ans.append (1, Base64Char[((c1 & 0x0f) << 2)]); - ans.append (1, '='); - } else if (size == 1) { - c0 = *b ++; - ans.append (1, Base64Char[(c0 >> 2) & 0x3f]); - ans.append (1, Base64Char[((c0 & 0x03) << 4)]); - ans.append (1, '='); - ans.append (1, '='); - } else { - break; - } - } - return ans; -} - ustring escape_re (const ustring& text) { ustring::const_iterator b, e; umatch m; diff --git a/lib/util_string.h b/lib/util_string.h index 53c6a23..b1291ce 100644 --- a/lib/util_string.h +++ b/lib/util_string.h @@ -2,7 +2,6 @@ #define UTIL_STRING_H #include "ustring.h" -#include "utf16.h" #include #include #include @@ -10,154 +9,6 @@ class MlEnv; -class Splitter { - public: - uregex* re; - uiterator b; - uiterator t; - uiterator u; - uiterator e; - umatch m; - - Splitter (const ustring& text, uregex& r) { - b = t = u = text.begin (); - e = text.end (); - re = &r; - }; - Splitter (uiterator pb, uiterator pe, uregex& r) { - b = t = u = pb; - e = pe; - re = &r; - }; - virtual ~Splitter () {}; - virtual void init (uiterator pb, uiterator pe) { - b = t = u = pb; - e = pe; - }; - virtual bool isEnd () { - return b == e; - }; - virtual bool next () { - b = u; - if (b != e) { - if (usearch (b, e, m, *re)) { - t = m[0].first; - u = m[0].second; - } else { - t = e; - u = e; - } - return true; - } else { - return false; - } - }; - virtual bool nextSep () { - b = u; - if (b != e) { - if (usearch (b, e, m, *re)) { - t = m[0].first; - u = m[0].second; - return true; - } else { - t = e; - u = e; - return false; - } - } else { - return false; - } - }; - virtual uiterator begin () { - return b; - }; - virtual uiterator end () { - return t; - }; - virtual ustring cur () { - return ustring (b, t); - }; - virtual bool match (int index) { - return (t != u && m[index].matched); - } - virtual uiterator matchBegin () { - return t; - }; - virtual uiterator matchBegin (int index) { - return m[index].first; - }; - virtual uiterator matchEnd () { - return u; - }; - virtual uiterator matchEnd (int index) { - return m[index].second; - }; - virtual uiterator eol () { - return e; - }; - virtual void rewind (int i) { - int n = u - t; - if (n > i) { - u -= i; - } else { - u -= n; - } - }; - virtual bool nextSearch () { - if (u != e) { - if (usearch (u, e, m, *re)) { - t = m[0].first; - u = m[0].second; - return true; - } else { - t = e; - u = e; - return false; - } - } else { - t = e; - u = e; - return false; - } - }; - virtual void shiftCursor () { - b = u; - }; -}; - -class WSplitter { - public: - boost::wregex* re; - std::wstring::const_iterator b, t, u, e; - boost::wsmatch m; - - WSplitter (const std::wstring& text, boost::wregex& r) { - b = t = u = text.begin (); - e = text.end (); - re = &r; - }; - virtual ~WSplitter () {}; - virtual bool next () { - b = u; - if (b != e) { - if (regex_search (b, e, m, *re, boost::regex_constants::match_single_line)) { - t = m[0].first; - u = m[0].second; - } else { - t = e; - u = e; - } - return true; - } else { - return false; - } - }; - virtual ustring cur () { - std::wstring x (b, t); - return wtou (x); - }; -}; - ustring c3 (const ustring& str); inline ustring to_ustring (int32_t v) { return boost::lexical_cast (v); @@ -200,7 +51,6 @@ ustring dirPart (const ustring& path); ustring filePart_osSafe (const ustring& path); void split (uiterator b, uiterator e, uregex& re, std::vector& ans); bool splitChar (uiterator b, uiterator e, uiterator::value_type ch, uiterator& m1); -ustring base64Encode (uiterator b, uiterator e); ustring filenameEncode (const ustring& text); ustring filenameDecode (const ustring& text); ustring escape_re (const ustring& text); diff --git a/lib/util_wsplitter.h b/lib/util_wsplitter.h new file mode 100644 index 0000000..41f2f8d --- /dev/null +++ b/lib/util_wsplitter.h @@ -0,0 +1,40 @@ +#ifndef UTIL_WSPLITTER_H +#define UTIL_WSPLITTER_H + +#include "ustring.h" +#include "utf16.h" + +class WSplitter { + public: + boost::wregex* re; + std::wstring::const_iterator b, t, u, e; + boost::wsmatch m; + + WSplitter (const std::wstring& text, boost::wregex& r) { + b = t = u = text.begin (); + e = text.end (); + re = &r; + }; + virtual ~WSplitter () {}; + virtual bool next () { + b = u; + if (b != e) { + if (regex_search (b, e, m, *re, boost::regex_constants::match_single_line)) { + t = m[0].first; + u = m[0].second; + } else { + t = e; + u = e; + } + return true; + } else { + return false; + } + }; + virtual ustring cur () { + std::wstring x (b, t); + return wtou (x); + }; +}; + +#endif /* UTIL_WSPLITTER_H */ diff --git a/ml/Makefile b/ml/Makefile index d0b39d2..6ef9eb6 100644 --- a/ml/Makefile +++ b/ml/Makefile @@ -20,6 +20,7 @@ SRCS += sigsafe.cc SRCS += utf8.cc SRCS += utf16.cc SRCS += util_apache.cc +SRCS += util_base64.cc SRCS += util_check.cc SRCS += util_const.cc SRCS += util_file.cc diff --git a/modules/ml-formvar.cc b/modules/ml-formvar.cc index dfe7042..dcdc442 100644 --- a/modules/ml-formvar.cc +++ b/modules/ml-formvar.cc @@ -4,11 +4,12 @@ #include "mlenv.h" #include "formfile.h" #include "motorenv.h" -#include "util_string.h" #include "util_const.h" #include "util_check.h" +#include "util_string.h" #include "expr.h" #include "utf8.h" +#include "utf16.h" #include #include #include diff --git a/modules/ml-http.cc b/modules/ml-http.cc index 2210d23..67a60f0 100644 --- a/modules/ml-http.cc +++ b/modules/ml-http.cc @@ -315,17 +315,22 @@ MNode* ml_http_get (MNode* cell, MlEnv* mlenv) { if (keywords[5]) // cookie cookie = eval (keywords[5], mlenv); if (keywords[6]) { // proxy-host - obj.http.host = omitNonAsciiWord (eval_str (keywords[6], mlenv)); - obj.http.useproxy = true; +// obj.http.host = omitNonAsciiWord (eval_str (keywords[6], mlenv)); + MNodePtr h; + h = eval (keywords[6], mlenv); + if (! isNil (h ())) { + obj.http.host = omitNonAsciiWord (to_string (h ())); + obj.http.useproxy = true; + if (keywords[7]) // proxy-port + obj.http.port = omitNonAsciiWord (eval_str (keywords[7], mlenv)); + if (keywords[8]) // proxyid + obj.http.proxyid = omitCtrl (eval_str (keywords[8], mlenv)); + if (keywords[9]) // proxypassword + obj.http.proxypw = omitCtrl (eval_str (keywords[9], mlenv)); + if (keywords[10]) // proxypw + obj.http.proxypw = omitCtrl (eval_str (keywords[10], mlenv)); + } } - if (keywords[7]) // proxy-port - obj.http.port = omitNonAsciiWord (eval_str (keywords[7], mlenv)); - if (keywords[8]) // proxyid - obj.http.proxyid = omitCtrl (eval_str (keywords[8], mlenv)); - if (keywords[9]) // proxypassword - obj.http.proxypw = omitCtrl (eval_str (keywords[9], mlenv)); - if (keywords[10]) // proxypw - obj.http.proxypw = omitCtrl (eval_str (keywords[10], mlenv)); url_sub (url, obj.http); if (obj.http.proto.empty ()) { diff --git a/modules/ml-string.cc b/modules/ml-string.cc index b176d1c..629bc07 100644 --- a/modules/ml-string.cc +++ b/modules/ml-string.cc @@ -7,6 +7,7 @@ #include "util_check.h" #include "util_random.h" #include "util_string.h" +#include "util_wsplitter.h" #include "expr.h" #include "utf8.h" #include "utf16.h" diff --git a/wiki/wikiformat.h b/wiki/wikiformat.h index 4d91bc3..4b909c5 100644 --- a/wiki/wikiformat.h +++ b/wiki/wikiformat.h @@ -9,6 +9,7 @@ #include "mlenv.h" #include "ftable.h" #include "ustring.h" +#include "util_splitter.h" #include "util_string.h" #include #include diff --git a/wiki/wikimotor.h b/wiki/wikimotor.h index c8da013..a4ec4a4 100644 --- a/wiki/wikimotor.h +++ b/wiki/wikimotor.h @@ -5,6 +5,7 @@ #include "wikienv.h" #include "motor.h" #include "ml.h" +#include "util_splitter.h" #include "util_string.h" #include "ustring.h" #include -- 2.11.0