From c1bd5fbc2540821b5d18a1edfe1acede0c148874 Mon Sep 17 00:00:00 2001
From: visor <visor@users.sourceforge.jp>
Date: Thu, 23 Sep 2010 00:40:15 +0900
Subject: [PATCH] fix string utils.

---
 ext/ml-sqlite3.cc     |   1 +
 lib/http.cc           |   4 +-
 lib/iso2022jp.cc      |   1 +
 lib/ml.cc             |   1 +
 lib/motor.cc          |   3 +-
 lib/utf8.cc           |   3 +-
 lib/util_apache.cc    |   1 +
 lib/util_base64.cc    |  89 ++++++++++++++++++++++++++++++
 lib/util_base64.h     |   9 +++
 lib/util_splitter.h   | 121 ++++++++++++++++++++++++++++++++++++++++
 lib/util_string.cc    |  37 +------------
 lib/util_string.h     | 150 --------------------------------------------------
 lib/util_wsplitter.h  |  40 ++++++++++++++
 ml/Makefile           |   1 +
 modules/ml-formvar.cc |   3 +-
 modules/ml-http.cc    |  25 +++++----
 modules/ml-string.cc  |   1 +
 wiki/wikiformat.h     |   1 +
 wiki/wikimotor.h      |   1 +
 19 files changed, 292 insertions(+), 200 deletions(-)
 create mode 100644 lib/util_base64.cc
 create mode 100644 lib/util_base64.h
 create mode 100644 lib/util_splitter.h
 create mode 100644 lib/util_wsplitter.h
diff --git a/ext/ml-sqlite3.cc b/ext/ml-sqlite3.cc
index 2cc5fbe..91edb4a 100644
--- a/ext/ml-sqlite3.cc
+++ b/ext/ml-sqlite3.cc
@@ -7,6 +7,7 @@
 #include "expr.h"
 #include "util_const.h"
 #include "util_file.h"
+#include "util_splitter.h"
 #include "util_string.h"
 #include "utf8.h"
 #include <exception>
diff --git a/lib/http.cc b/lib/http.cc
index 57ccfb8..2568d69 100644
--- a/lib/http.cc
+++ b/lib/http.cc
@@ -4,9 +4,11 @@
 #include "motoroutput.h"
 #include "util_string.h"
 #include "util_time.h"
+#include "util_base64.h"
 #include "util_check.h"
 #include "util_const.h"
 #include "util_random.h"
+#include "util_splitter.h"
 #include "util_tcp.h"
 #include "ustring.h"
 #include "utf8.h"
@@ -337,7 +339,7 @@ ustring  HTTPSend::query () {
 	idpw.assign (id).append (uColon).append (pw);
 	q.append (CharConst ("Authorization: Basic ")).append (base64Encode (idpw.begin (), idpw.end ())).append (uCRLF);
     }
-    if (proxyid.length () > 0) {
+    if (useproxy && proxyid.length () > 0) {
 	ustring idpw;
 	idpw.assign (proxyid).append (uColon).append (proxypw);
 	q.append (CharConst ("Proxy-Authorization: Basic ")).append (base64Encode (idpw.begin (), idpw.end ())).append (uCRLF);
diff --git a/lib/iso2022jp.cc b/lib/iso2022jp.cc
index eba6f2b..3ca997a 100644
--- a/lib/iso2022jp.cc
+++ b/lib/iso2022jp.cc
@@ -1,4 +1,5 @@
 #include "iso2022jp.h"
+#include "util_base64.h"
 #include "util_string.h"
 #include "ustring.h"
 
diff --git a/lib/ml.cc b/lib/ml.cc
index d853287..fc3eb09 100644
--- a/lib/ml.cc
+++ b/lib/ml.cc
@@ -3,6 +3,7 @@
 #include "mlenv.h"
 #include "expr.h"
 #include "utf8.h"
+#include "utf16.h"
 #include "ustring.h"
 #include "util_string.h"
 #include "util_const.h"
diff --git a/lib/motor.cc b/lib/motor.cc
index 658b559..67467a4 100644
--- a/lib/motor.cc
+++ b/lib/motor.cc
@@ -4,8 +4,9 @@
 #include "mlenv.h"
 #include "expr.h"
 #include "util_const.h"
-#include "util_string.h"
 #include "util_file.h"
+#include "util_splitter.h"
+#include "util_string.h"
 #include "ustring.h"
 #include "mftable.h"
 #include "motorconst.h"
diff --git a/lib/utf8.cc b/lib/utf8.cc
index 586ad1e..423adba 100644
--- a/lib/utf8.cc
+++ b/lib/utf8.cc
@@ -15,7 +15,8 @@ ustring  fixUTF8 (const ustring& str) {
     ans.reserve (n);
     for (i = 0; i < n;) {
 	c = str[i ++];
-	if (c <= 0x7f) {
+	if (c == 0) {
+	} else if (c <= 0x7f) {
 	    if (c == '\r') {
 		ans.append (1, '\n');
 		if (i < n && str[i] == '\n') {
diff --git a/lib/util_apache.cc b/lib/util_apache.cc
index 8f5cc0e..9d1307e 100644
--- a/lib/util_apache.cc
+++ b/lib/util_apache.cc
@@ -1,6 +1,7 @@
 #include "util_apache.h"
 #include "util_const.h"
 #include "util_file.h"
+#include "util_splitter.h"
 #include "util_string.h"
 #include "httpconst.h"
 #include "ustring.h"
diff --git a/lib/util_base64.cc b/lib/util_base64.cc
new file mode 100644
index 0000000..59fedf9
--- /dev/null
+++ b/lib/util_base64.cc
@@ -0,0 +1,89 @@
+#include "util_base64.h"
+#include "ustring.h"
+#include <string.h>
+
+static char  Base64Char[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+ustring  base64Encode (uiterator b, uiterator e) {
+    ustring  ans;
+    size_t  size;
+    int  c0, c1, c2;
+
+    while (b != e) {
+	size = e - b;
+	if (size >= 3) {
+	    c0 = *b ++;
+	    c1 = *b ++;
+	    c2 = *b ++;
+	    ans.append (1, Base64Char[(c0 >> 2) & 0x3f]);
+	    ans.append (1, Base64Char[((c0 & 0x03) << 4) | ((c1 >> 4) & 0x0f)]);
+	    ans.append (1, Base64Char[((c1 & 0x0f) << 2) | ((c2 >> 6) & 0x03)]);
+	    ans.append (1, Base64Char[c2 & 0x3f]);
+	} else if (size == 2) {
+	    c0 = *b ++;
+	    c1 = *b ++;
+	    ans.append (1, Base64Char[(c0 >> 2) & 0x3f]);
+	    ans.append (1, Base64Char[((c0 & 0x03) << 4) | ((c1 >> 4) & 0x0f)]);
+	    ans.append (1, Base64Char[((c1 & 0x0f) << 2)]);
+	    ans.append (1, '=');
+	} else if (size == 1) {
+	    c0 = *b ++;
+	    ans.append (1, Base64Char[(c0 >> 2) & 0x3f]);
+	    ans.append (1, Base64Char[((c0 & 0x03) << 4)]);
+	    ans.append (1, '=');
+	    ans.append (1, '=');
+	} else {
+	    break;
+	}
+    }
+    return ans;
+}
+
+ustring  base64Decode (uiterator b, uiterator e) {
+    ustring  ans;
+    size_t  size;
+    u_int  c0, c1, c2, c3;
+    char*  p;
+    u_int  c, x;
+
+    size = e - b;
+    while (size >= 4) {
+	c0 = *b ++;
+	size --;
+	if (isspace (c0)) {
+	} else {
+	    c1 = *b ++;
+	    c2 = *b ++;
+	    c3 = *b ++;
+	    size -= 3;
+	    x = 0;
+
+	    p = (char*)memchr (Base64Char, c0, sizeof (Base64Char) - 1);
+	    if (p == NULL) 
+		break;
+	    x = (p - Base64Char) << 2;
+
+	    p = (char*)memchr (Base64Char, c1, sizeof (Base64Char) - 1);
+	    if (p == NULL)
+		break;
+	    c = p - Base64Char;
+	    ans.append (1, (x | (c >> 4)));
+	    x = c << 4;
+
+	    p = (char*)memchr (Base64Char, c2, sizeof (Base64Char) - 1);
+	    if (p == NULL)
+		break;
+	    c = p - Base64Char;
+	    ans.append (1, (x | (c >> 2)));
+	    x = c << 6;
+
+	    p = (char*)memchr (Base64Char, c3, sizeof (Base64Char) - 1);
+	    if (p == NULL)
+		break;
+	    c = p - Base64Char;
+	    ans.append (1, (x | c));
+	}
+    }
+    return ans;
+}
+
diff --git a/lib/util_base64.h b/lib/util_base64.h
new file mode 100644
index 0000000..62267e6
--- /dev/null
+++ b/lib/util_base64.h
@@ -0,0 +1,9 @@
+#ifndef UTIL_BASE64_H
+#define UTIL_BASE64_H
+
+#include "ustring.h"
+
+ustring  base64Encode (uiterator b, uiterator e);
+ustring  base64Decode (uiterator b, uiterator e);
+
+#endif /* UTIL_BASE64_H */
diff --git a/lib/util_splitter.h b/lib/util_splitter.h
new file mode 100644
index 0000000..cc3117d
--- /dev/null
+++ b/lib/util_splitter.h
@@ -0,0 +1,121 @@
+#ifndef UTIL_SPLITTER_H
+#define UTIL_SPLITTER_H
+
+#include "ustring.h"
+
+class  Splitter {
+ public:
+    uregex*  re;
+    uiterator  b;
+    uiterator  t;
+    uiterator  u;
+    uiterator  e;
+    umatch  m;
+
+    Splitter (const ustring& text, uregex& r) {
+	b = t = u = text.begin ();
+	e = text.end ();
+	re = &r;
+    };
+    Splitter (uiterator pb, uiterator pe, uregex& r) {
+	b = t = u = pb;
+	e = pe;
+	re = &r;
+    };
+    virtual  ~Splitter () {};
+    virtual void  init (uiterator pb, uiterator pe) {
+	b = t = u = pb;
+	e = pe;
+    };
+    virtual bool  isEnd () {
+	return b == e;
+    };
+    virtual bool  next () {
+	b = u;
+	if (b != e) {
+	    if (usearch (b, e, m, *re)) {
+		t = m[0].first;
+		u = m[0].second;
+	    } else {
+		t = e;
+		u = e;
+	    }
+	    return true;
+	} else {
+	    return false;
+	}
+    };
+    virtual bool  nextSep () {
+	b = u;
+	if (b != e) {
+	    if (usearch (b, e, m, *re)) {
+		t = m[0].first;
+		u = m[0].second;
+		return true;
+	    } else {
+		t = e;
+		u = e;
+		return false;
+	    }
+	} else {
+	    return false;
+	}
+    };
+    virtual uiterator  begin () {
+	return b;
+    };
+    virtual uiterator  end () {
+	return t;
+    };
+    virtual ustring  cur () {
+	return ustring (b, t);
+    };
+    virtual bool  match (int index) {
+	return (t != u && m[index].matched);
+    }
+    virtual uiterator  matchBegin () {
+	return t;
+    };
+    virtual uiterator  matchBegin (int index) {
+	return m[index].first;
+    };
+    virtual uiterator  matchEnd () {
+	return u;
+    };
+    virtual uiterator  matchEnd (int index) {
+	return m[index].second;
+    };
+    virtual uiterator  eol () {
+	return e;
+    };
+    virtual void  rewind (int i) {
+	int  n = u - t;
+	if (n > i) {
+	    u -= i;
+	} else {
+	    u -= n;
+	}
+    };
+    virtual bool  nextSearch () {
+	if (u != e) {
+	    if (usearch (u, e, m, *re)) {
+		t = m[0].first;
+		u = m[0].second;
+		return true;
+	    } else {
+		t = e;
+		u = e;
+		return false;
+	    }
+	} else {
+	    t = e;
+	    u = e;
+	    return false;
+	}
+    };
+    virtual void  shiftCursor () {
+	b = u;
+    };
+};
+
+#endif /* UTIL_SPLITTER_H */
diff --git a/lib/util_string.cc b/lib/util_string.cc
index 52ce8dc..e78523a 100644
--- a/lib/util_string.cc
+++ b/lib/util_string.cc
@@ -1,6 +1,7 @@
 #include "util_string.h"
 #include "util_const.h"
 #include "util_random.h"
+#include "util_splitter.h"
 #include "ml.h"
 #include "mlenv.h"
 #include "motorenv.h"
@@ -317,42 +318,6 @@ bool  splitChar (uiterator b, uiterator e, uiterator::value_type ch, uiterator&
     return false;
 }
 
-static char  Base64Char[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
-ustring  base64Encode (uiterator b, uiterator e) {
-    ustring  ans;
-    size_t  size;
-    int  c0, c1, c2;
-
-    while (b != e) {
-	size = e - b;
-	if (size >= 3) {
-	    c0 = *b ++;
-	    c1 = *b ++;
-	    c2 = *b ++;
-	    ans.append (1, Base64Char[(c0 >> 2) & 0x3f]);
-	    ans.append (1, Base64Char[((c0 & 0x03) << 4) | ((c1 >> 4) & 0x0f)]);
-	    ans.append (1, Base64Char[((c1 & 0x0f) << 2) | ((c2 >> 6) & 0x03)]);
-	    ans.append (1, Base64Char[c2 & 0x3f]);
-	} else if (size == 2) {
-	    c0 = *b ++;
-	    c1 = *b ++;
-	    ans.append (1, Base64Char[(c0 >> 2) & 0x3f]);
-	    ans.append (1, Base64Char[((c0 & 0x03) << 4) | ((c1 >> 4) & 0x0f)]);
-	    ans.append (1, Base64Char[((c1 & 0x0f) << 2)]);
-	    ans.append (1, '=');
-	} else if (size == 1) {
-	    c0 = *b ++;
-	    ans.append (1, Base64Char[(c0 >> 2) & 0x3f]);
-	    ans.append (1, Base64Char[((c0 & 0x03) << 4)]);
-	    ans.append (1, '=');
-	    ans.append (1, '=');
-	} else {
-	    break;
-	}
-    }
-    return ans;
-}
-
 ustring  escape_re (const ustring& text) {
     ustring::const_iterator  b, e;
     umatch  m;
diff --git a/lib/util_string.h b/lib/util_string.h
index 53c6a23..b1291ce 100644
--- a/lib/util_string.h
+++ b/lib/util_string.h
@@ -2,7 +2,6 @@
 #define UTIL_STRING_H
 
 #include "ustring.h"
-#include "utf16.h"
 #include <time.h>
 #include <vector>
 #include <boost/ptr_container/ptr_vector.hpp>
@@ -10,154 +9,6 @@
 
 class  MlEnv;
 
-class  Splitter {
- public:
-    uregex*  re;
-    uiterator  b;
-    uiterator  t;
-    uiterator  u;
-    uiterator  e;
-    umatch  m;
-
-    Splitter (const ustring& text, uregex& r) {
-	b = t = u = text.begin ();
-	e = text.end ();
-	re = &r;
-    };
-    Splitter (uiterator pb, uiterator pe, uregex& r) {
-	b = t = u = pb;
-	e = pe;
-	re = &r;
-    };
-    virtual  ~Splitter () {};
-    virtual void  init (uiterator pb, uiterator pe) {
-	b = t = u = pb;
-	e = pe;
-    };
-    virtual bool  isEnd () {
-	return b == e;
-    };
-    virtual bool  next () {
-	b = u;
-	if (b != e) {
-	    if (usearch (b, e, m, *re)) {
-		t = m[0].first;
-		u = m[0].second;
-	    } else {
-		t = e;
-		u = e;
-	    }
-	    return true;
-	} else {
-	    return false;
-	}
-    };
-    virtual bool  nextSep () {
-	b = u;
-	if (b != e) {
-	    if (usearch (b, e, m, *re)) {
-		t = m[0].first;
-		u = m[0].second;
-		return true;
-	    } else {
-		t = e;
-		u = e;
-		return false;
-	    }
-	} else {
-	    return false;
-	}
-    };
-    virtual uiterator  begin () {
-	return b;
-    };
-    virtual uiterator  end () {
-	return t;
-    };
-    virtual ustring  cur () {
-	return ustring (b, t);
-    };
-    virtual bool  match (int index) {
-	return (t != u && m[index].matched);
-    }
-    virtual uiterator  matchBegin () {
-	return t;
-    };
-    virtual uiterator  matchBegin (int index) {
-	return m[index].first;
-    };
-    virtual uiterator  matchEnd () {
-	return u;
-    };
-    virtual uiterator  matchEnd (int index) {
-	return m[index].second;
-    };
-    virtual uiterator  eol () {
-	return e;
-    };
-    virtual void  rewind (int i) {
-	int  n = u - t;
-	if (n > i) {
-	    u -= i;
-	} else {
-	    u -= n;
-	}
-    };
-    virtual bool  nextSearch () {
-	if (u != e) {
-	    if (usearch (u, e, m, *re)) {
-		t = m[0].first;
-		u = m[0].second;
-		return true;
-	    } else {
-		t = e;
-		u = e;
-		return false;
-	    }
-	} else {
-	    t = e;
-	    u = e;
-	    return false;
-	}
-    };
-    virtual void  shiftCursor () {
-	b = u;
-    };
-};
-
-class  WSplitter {
- public:
-    boost::wregex*  re;
-    std::wstring::const_iterator  b, t, u, e;
-    boost::wsmatch  m;
-
-    WSplitter (const std::wstring& text, boost::wregex& r) {
-	b = t = u = text.begin ();
-	e = text.end ();
-	re = &r;
-    };
-    virtual  ~WSplitter () {};
-    virtual bool  next () {
-	b = u;
-	if (b != e) {
-	    if (regex_search (b, e, m, *re, boost::regex_constants::match_single_line)) {
-		t = m[0].first;
-		u = m[0].second;
-	    } else {
-		t = e;
-		u = e;
-	    }
-	    return true;
-	} else {
-	    return false;
-	}
-    };
-    virtual ustring  cur () {
-	std::wstring  x (b, t);
-	return wtou (x);
-    };
-};
-
 ustring  c3 (const ustring& str);
 inline ustring  to_ustring (int32_t v) {
     return boost::lexical_cast<ustring> (v);
@@ -200,7 +51,6 @@ ustring  dirPart (const ustring& path);
 ustring  filePart_osSafe (const ustring& path);
 void  split (uiterator b, uiterator e, uregex& re, std::vector<ustring>& ans);
 bool  splitChar (uiterator b, uiterator e, uiterator::value_type ch, uiterator& m1);
-ustring  base64Encode (uiterator b, uiterator e);
 ustring  filenameEncode (const ustring& text);
 ustring  filenameDecode (const ustring& text);
 ustring  escape_re (const ustring& text);
diff --git a/lib/util_wsplitter.h b/lib/util_wsplitter.h
new file mode 100644
index 0000000..41f2f8d
--- /dev/null
+++ b/lib/util_wsplitter.h
@@ -0,0 +1,40 @@
+#ifndef UTIL_WSPLITTER_H
+#define UTIL_WSPLITTER_H
+
+#include "ustring.h"
+#include "utf16.h"
+
+class  WSplitter {
+ public:
+    boost::wregex*  re;
+    std::wstring::const_iterator  b, t, u, e;
+    boost::wsmatch  m;
+
+    WSplitter (const std::wstring& text, boost::wregex& r) {
+	b = t = u = text.begin ();
+	e = text.end ();
+	re = &r;
+    };
+    virtual  ~WSplitter () {};
+    virtual bool  next () {
+	b = u;
+	if (b != e) {
+	    if (regex_search (b, e, m, *re, boost::regex_constants::match_single_line)) {
+		t = m[0].first;
+		u = m[0].second;
+	    } else {
+		t = e;
+		u = e;
+	    }
+	    return true;
+	} else {
+	    return false;
+	}
+    };
+    virtual ustring  cur () {
+	std::wstring  x (b, t);
+	return wtou (x);
+    };
+};
+
+#endif /* UTIL_WSPLITTER_H */
diff --git a/ml/Makefile b/ml/Makefile
index d0b39d2..6ef9eb6 100644
--- a/ml/Makefile
+++ b/ml/Makefile
@@ -20,6 +20,7 @@ SRCS += sigsafe.cc
 SRCS += utf8.cc
 SRCS += utf16.cc
 SRCS += util_apache.cc
+SRCS += util_base64.cc
 SRCS += util_check.cc
 SRCS += util_const.cc
 SRCS += util_file.cc
diff --git a/modules/ml-formvar.cc b/modules/ml-formvar.cc
index dfe7042..dcdc442 100644
--- a/modules/ml-formvar.cc
+++ b/modules/ml-formvar.cc
@@ -4,11 +4,12 @@
 #include "mlenv.h"
 #include "formfile.h"
 #include "motorenv.h"
-#include "util_string.h"
 #include "util_const.h"
 #include "util_check.h"
+#include "util_string.h"
 #include "expr.h"
 #include "utf8.h"
+#include "utf16.h"
 #include <exception>
 #include <iostream>
 #include <assert.h>
diff --git a/modules/ml-http.cc b/modules/ml-http.cc
index 2210d23..67a60f0 100644
--- a/modules/ml-http.cc
+++ b/modules/ml-http.cc
@@ -315,17 +315,22 @@ MNode*  ml_http_get (MNode* cell, MlEnv* mlenv) {
     if (keywords[5])		// cookie
 	cookie = eval (keywords[5], mlenv);
     if (keywords[6]) {		// proxy-host
-	obj.http.host = omitNonAsciiWord (eval_str (keywords[6], mlenv));
-	obj.http.useproxy = true;
+//	obj.http.host = omitNonAsciiWord (eval_str (keywords[6], mlenv));
+	MNodePtr  h;
+	h = eval (keywords[6], mlenv);
+	if (! isNil (h ())) {
+	    obj.http.host = omitNonAsciiWord (to_string (h ()));
+	    obj.http.useproxy = true;
+	    if (keywords[7])		// proxy-port
+		obj.http.port = omitNonAsciiWord (eval_str (keywords[7], mlenv));
+	    if (keywords[8])		// proxyid
+		obj.http.proxyid = omitCtrl (eval_str (keywords[8], mlenv));
+	    if (keywords[9])		// proxypassword
+		obj.http.proxypw = omitCtrl (eval_str (keywords[9], mlenv));
+	    if (keywords[10])		// proxypw
+		obj.http.proxypw = omitCtrl (eval_str (keywords[10], mlenv));
+	}
     }
-    if (keywords[7])		// proxy-port
-	obj.http.port = omitNonAsciiWord (eval_str (keywords[7], mlenv));
-    if (keywords[8])		// proxyid
-	obj.http.proxyid = omitCtrl (eval_str (keywords[8], mlenv));
-    if (keywords[9])		// proxypassword
-	obj.http.proxypw = omitCtrl (eval_str (keywords[9], mlenv));
-    if (keywords[10])		// proxypw
-	obj.http.proxypw = omitCtrl (eval_str (keywords[10], mlenv));
     
     url_sub (url, obj.http);
     if (obj.http.proto.empty ()) {
diff --git a/modules/ml-string.cc b/modules/ml-string.cc
index b176d1c..629bc07 100644
--- a/modules/ml-string.cc
+++ b/modules/ml-string.cc
@@ -7,6 +7,7 @@
 #include "util_check.h"
 #include "util_random.h"
 #include "util_string.h"
+#include "util_wsplitter.h"
 #include "expr.h"
 #include "utf8.h"
 #include "utf16.h"
diff --git a/wiki/wikiformat.h b/wiki/wikiformat.h
index 4d91bc3..4b909c5 100644
--- a/wiki/wikiformat.h
+++ b/wiki/wikiformat.h
@@ -9,6 +9,7 @@
 #include "mlenv.h"
 #include "ftable.h"
 #include "ustring.h"
+#include "util_splitter.h"
 #include "util_string.h"
 #include <boost/ptr_container/ptr_vector.hpp>
 #include <vector>
diff --git a/wiki/wikimotor.h b/wiki/wikimotor.h
index c8da013..a4ec4a4 100644
--- a/wiki/wikimotor.h
+++ b/wiki/wikimotor.h
@@ -5,6 +5,7 @@
 #include "wikienv.h"
 #include "motor.h"
 #include "ml.h"
+#include "util_splitter.h"
 #include "util_string.h"
 #include "ustring.h"
 #include <boost/ptr_container/ptr_vector.hpp>
-- 
2.11.0