reduce regex's.

author visor <visor@users.sourceforge.jp>

Mon, 16 Mar 2015 15:44:30 +0000 (00:44 +0900)

committer visor <visor@users.sourceforge.jp>

Tue, 17 Mar 2015 13:11:15 +0000 (22:11 +0900)
author visor <visor@users.sourceforge.jp>
Mon, 16 Mar 2015 15:44:30 +0000 (00:44 +0900)
committer visor <visor@users.sourceforge.jp>
Tue, 17 Mar 2015 13:11:15 +0000 (22:11 +0900)
diff --git a/cgi/main.cc b/cgi/main.cc

index ac816ba..a8b050c 100644 (file)
--- a/cgi/main.cc
+++ b/cgi/main.cc
@@ -14,6 +14,7 @@
  #include "util_string.h"
  #include "util_file.h"
  #include "util_mimetype.h"
+#include "util_splitter.h"
  #include "ustring.h"
  #include <iostream>
  #include <stdlib.h>
@@ -34,14 +35,18 @@ static ustring  glue1 (MotorEnv* env) {
  
      p = getenvString (kPATH_INFO);
      {
-       uiterator  b = p.begin ();
-       uiterator  e = p.end ();
-       umatch  m;
-       while (usearch (b, e, m, re_slash)) {
-           a.push_back (ustring (b, m[0].first));
-           b = m[0].second;
+//     uiterator  b = p.begin ();
+//     uiterator  e = p.end ();
+//     umatch  m;
+//     while (usearch (b, e, m, re_slash)) {
+//         a.push_back (ustring (b, m[0].first));
+//         b = m[0].second;
+//     }
+//     a.push_back (ustring (b, e));
+       SplitterCh  sp (p, '/');
+       while (sp.next ()) {
+           a.push_back (sp.pre ());
         }
-       a.push_back (ustring (b, e));
      }
      if (cwd) {
         scriptfilename = ustring (cwd);
diff --git a/ext/ml-sqlite3.cc b/ext/ml-sqlite3.cc

index 4521e98..bbf61c5 100644 (file)
--- a/ext/ml-sqlite3.cc
+++ b/ext/ml-sqlite3.cc
@@ -510,23 +510,19 @@ MNode*  ml_sqlite3_rowid (MNode* cell, MlEnv* mlenv, MLFunc* mobj) {
  }
  
  static ustring  escape_like (const ustring& str) {
-    static uregex  re ("[%_\\\\]");
-    Splitter  sp (str, re);
-    ustring  ans;
-
-    while (sp.next ()) {
-       ans.append (sp.begin (), sp.end ());
-       switch (*sp.matchBegin ()) {
-       case '%':
-       case '_':
-       case '\\':
+    SplitterChars  sp (str, ustring (CharConst ("%_\\")));
+    if (sp.nextSep ()) {
+       ustring  ans;
+       do {
+           ans.append (sp.pre ());
             ans.append (CharConst ("\\"));
             ans.append (sp.matchBegin (), sp.matchEnd ());
-           break;
-       default:;
-       }
+       } while (sp.nextSep ());
+       ans.append (sp.pre ());
+       return ans;
+    } else {
+       return str;
      }
-    return ans;
  }
  
  /*DOC:
diff --git a/ext/ml-tcpserver.cc b/ext/ml-tcpserver.cc

index 9e511cc..22a413f 100644 (file)
--- a/ext/ml-tcpserver.cc
+++ b/ext/ml-tcpserver.cc
@@ -53,7 +53,8 @@ void  MLDbTcpserver::addAllow (const ustring& key, time_t span, MNode* rest, MlE
         while (rest) {
             estr = eval_str (rest->car (), mlenv);
             nextNode (rest);
-           if (! checkASCII (estr) || estr.length () > 1024)
+//         if (! checkASCII (estr) || estr.length () > 1024)
+           if (! matchASCII (estr.begin (), estr.end ()) || estr.length () > 1024)
                 throw (estr + uErrorBadValue);
             val.append (CharConst ("+")).append (estr).append (1, 0);
         }
@@ -156,7 +157,7 @@ MNode*  ml_dbtcpserver (MNode* cell, MlEnv* mlenv) {
  
      if (name.size () == 0)
         throw (uErrorFilenameEmpty);
-    if (! checkName (name))
+    if (! matchName (name))
         throw (name + uErrorBadName);
      if (mlenv->env) {
         SigSafe  sig;
diff --git a/lib/app.cc b/lib/app.cc

index fe56c44..046ead7 100644 (file)
--- a/lib/app.cc
+++ b/lib/app.cc
@@ -61,51 +61,51 @@ void  AppEnv::readOption (int argc, char** argv, MotorEnv* env) {
         p = argv[i];
         if (cmp (p, "datastore:")) {
             datastore = ustring (p);
-           if (! checkName (datastore))
+           if (! matchName (datastore))
                 throw (datastore + uErrorBadDatastore);
         } else if (cmp (p, "get-html:")) {
             getHtml = ustring (p);
-           if (getHtml != uDash && ! checkResourceName (getHtml))
+           if (getHtml != uDash && ! matchResourceName (getHtml))
                 throw (getHtml + uErrorBadFile);
         } else if (cmp (p, "post-html:")) {
             postHtml = ustring (p);
-           if (postHtml != uDash && ! checkResourceName (postHtml))
+           if (postHtml != uDash && ! matchResourceName (postHtml))
                 throw (postHtml + uErrorBadFile);
         } else if (cmp (p, "post-file-html:")) {
             postFileHtml = ustring (p);
-           if (postFileHtml != uDash && ! checkResourceName (postFileHtml))
+           if (postFileHtml != uDash && ! matchResourceName (postFileHtml))
                 throw (postFileHtml + uErrorBadFile);
         } else if (cmp (p, "html:")) {
             postFileHtml = postHtml = getHtml = ustring (p);
-           if (getHtml != uDash && ! checkResourceName (getHtml))
+           if (getHtml != uDash && ! matchResourceName (getHtml))
                 throw (getHtml + uErrorBadFile);
         } else if (cmp (p, "error-html:")) {
             errorHtml = ustring (p);
-           if (errorHtml != uDash && ! checkResourceName (errorHtml))
+           if (errorHtml != uDash && ! matchResourceName (errorHtml))
                 throw (errorHtml + uErrorBadFile);
         } else if (cmp (p, "get-ml:")) {
             getML = ustring (p);
-           if (getML != uDash && ! checkResourceName (getML))
+           if (getML != uDash && ! matchResourceName (getML))
                 throw (getML + uErrorBadFile);
         } else if (cmp (p, "post-ml:")) {
             postML = ustring (p);
-           if (postML != uDash && ! checkResourceName (postML))
+           if (postML != uDash && ! matchResourceName (postML))
                 throw (postML + uErrorBadFile);
         } else if (cmp (p, "post-file-ml:")) {
             postFileML = ustring (p);
-           if (postFileML != uDash && ! checkResourceName (postFileML))
+           if (postFileML != uDash && ! matchResourceName (postFileML))
                 throw (postFileML + uErrorBadFile);
         } else if (cmp (p, "ml:")) {
             postML = getML = ustring (p);
-           if (getML != uDash && ! checkResourceName (getML))
+           if (getML != uDash && ! matchResourceName (getML))
                 throw (getML + uErrorBadFile);
         } else if (cmp (p, "type:")) {
             mimetype = ustring (p);
-           if (! checkMimeType (mimetype))
+           if (! matchMimeType (mimetype))
                 throw (mimetype + ": bad mime type.");
         } else if (cmp (p, "to-code:")) {
             ocode = ustring (p);
-           if (! checkName (ocode))
+           if (! matchName (ocode))
                 throw (ocode + ": bad encoding name.");
         } else if (cmp (p, "post-limit:")) {
             int  num = boost::lexical_cast <int> (p);
diff --git a/lib/expr.cc b/lib/expr.cc

index e6ed86f..d753fad 100644 (file)
--- a/lib/expr.cc
+++ b/lib/expr.cc
@@ -77,7 +77,7 @@ MNode*  eval (MNode* cell, MlEnv* mlenv) {
             && (cell->cdr () == NULL || cell->cdr ()->isCons ())) {
             return callFunc (cell, mlenv);
         } else {
-           throw (cell->dump_string_short () + ustring (": error"));
+           throw (cell->dump_string_short () + ustring (CharConst (": error")));
         }
         break;
      case MNode::MC_STR:
@@ -221,7 +221,7 @@ bool  eval_bool (MNode* cell, MlEnv* mlenv) {
  
  ustring  eval_file (MNode* cell, MlEnv* mlenv) {
      ustring  ans = eval_str (cell, mlenv);
-    if (! checkFilename (ans)) // XXX dummy
+    if (! matchFilename (ans)) // XXX dummy
         ans.resize (0);
      return ans;
  }
diff --git a/lib/form.cc b/lib/form.cc

index f8f3854..f49b121 100644 (file)
--- a/lib/form.cc
+++ b/lib/form.cc
@@ -176,7 +176,7 @@ int  CGIForm::insert (map_t& mp, const ustring& name, const ustring& value) {
      int  ans = -1;
  
  #ifdef STRICT_FORMVAR
-    if (checkName (name))
+    if (matchName (name))
  #else
      if (name.length () > 0 && name.length () < 64)
  #endif
diff --git a/lib/formfile.cc b/lib/formfile.cc

index 441a724..8c782d7 100644 (file)
--- a/lib/formfile.cc
+++ b/lib/formfile.cc
@@ -22,6 +22,19 @@ int  CGIFormFile::partAt (int i) {
      }
  }
  
+ustring  CGIFormFile::typeAt (int i) {
+    if (i >= 0) {
+       tary_t::iterator  it = typemap.find (i);
+       if (it == typemap.end ()) {
+           return uEmpty;
+       } else {
+           return it->second;
+       }
+    } else {
+       return uEmpty;
+    }
+}
+
  void  CGIFormFile::read_multipart (MotorEnv* env) {
  #ifdef DEBUG2
      std::cerr << "boundary:" << boundary << "\n";
@@ -123,8 +136,10 @@ void  CGIFormFile::searchPart (MotorEnv* env) {
                 fix (name);
                 parts.push_back (part (b, x));
                 fix (filename);
+               fix (type);
                 k1 = insert (iarg, name, filePart_osSafe (filename));
                 datamap.insert (sary_t::value_type (k1, k2));
+               typemap.insert (tary_t::value_type (k2, type));
  #ifdef DEBUG2
                 std::cerr << "insert(" << k1 << "," << k2 << ")\n";
  #endif /* DEBUG */
@@ -180,10 +195,72 @@ void  CGIFormFile::compileReg () {
      reN.assign (a);
  }
  
+class  ChSplitterNL {
+ public:
+    char*  b;          // 先頭
+    char*  t;          // 区切り文字列先頭
+    char*  u;          // 区切り文字列末尾
+    char*  e;          // 末尾
+
+    ChSplitterNL (char* _begin, char* _end) {
+       b = t = u = _begin;
+       e = _end;
+    };
+    ~ChSplitterNL () {};
+
+    bool  isEnd () {
+       return b == e;
+    };
+    ustring  pre () {
+       return ustring (b, t);
+    };
+    bool  next () {
+       b = t = u;
+       if (b < e) {
+           if (findNL ()) {
+           } else {
+               t = u = e;
+           }
+           return true;
+       } else {
+           return false;
+       }
+    };
+    bool  nextSep () {
+       b = t = u;
+       if (b < e) {
+           if (findNL ()) {
+               return true;
+           } else {
+               t = u = e;
+               return false;
+           }
+       } else {
+           t = u = e;
+           return false;
+       }
+    };
+    bool  findNL () {
+       for (; t < e; ++ t) {
+           if (*t == '\n') {
+               u = t + 1;
+               return true;
+           } else if (*t == '\r') {
+               u = t + 1;
+               if (u < e && *u == '\n')
+                   ++ u;
+               return true;
+           }
+       }
+       return false;
+    };
+};
+
  void  CGIFormFile::readMimeHead (char*& b, char* e, ustring& disp, ustring& name, ustring& filename, ustring& type) {
-    boost::match_results<char*>  m;
+//    boost::match_results<char*>  m;
+    ChSplitterNL  sp (b, e);
      boost::match_results<char*>  m2;
-    char*  x;
+//    char*  x;
      static uregex  re_disp1 ("^Content-Disposition:\\s*(.*);\\s*name=\"(.*)\";\\s*filename=\"(.*)\"$");
      static uregex  re_disp2 ("^Content-Disposition:\\s*(.*);\\s*name=\"(.*)\"$");
      static uregex  re_type ("^Content-Type:\\s*([a-zA-Z_0-9/.+-]*)(;\\s*(.*))?$");
@@ -192,42 +269,40 @@ void  CGIFormFile::readMimeHead (char*& b, char* e, ustring& disp, ustring& name
      name.resize (0);
      filename.resize (0);
      type.resize (0);
-    while (b != e && regex_search (b, e, m, re_nl, boost::regex_constants::match_single_line)) {
-       x = m[0].first;
+//    while (b != e && regex_search (b, e, m, re_nl, boost::regex_constants::match_single_line)) {
+//     x = m[0].first;
+    while (sp.next ()) {
  #ifdef DEBUG2
-       std::cerr << "line:" << ustring (b, x) << "\n";
+//     std::cerr << "line:" << ustring (b, x) << "\n";
+       std::cerr << "line:" << sp.pre () << "\n";
  #endif /* DEBUG */
-       if (b == x) {           // empty line
-           b = m[0].second;
+//     if (b == x) {           // empty line
+//         b = m[0].second;
+       if (sp.b == sp.t) {
+           b = sp.u;
             break;
         }
-       if (regex_search (b, x, m2, re_disp1, boost::regex_constants::match_single_line)) {
+//     if (regex_search (b, x, m2, re_disp1, boost::regex_constants::match_single_line)) {
+       if (regex_search (sp.b, sp.t, m2, re_disp1, boost::regex_constants::match_single_line)) {
             disp.assign (m2[1].first, m2[1].second - m2[1].first);
             name.assign (m2[2].first, m2[2].second - m2[2].first);
             filename.assign (m2[3].first, m2[3].second - m2[3].first);
-       } else if (regex_search (b, x, m2, re_disp2, boost::regex_constants::match_single_line)) {
+//     } else if (regex_search (b, x, m2, re_disp2, boost::regex_constants::match_single_line)) {
+       } else if (regex_search (sp.b, sp.t, m2, re_disp2, boost::regex_constants::match_single_line)) {
             disp.assign (m2[1].first, m2[1].second - m2[1].first);
             name.assign (m2[2].first, m2[2].second - m2[2].first);
-       } else if (regex_search (b, x, m2, re_type, boost::regex_constants::match_single_line)) {
+//     } else if (regex_search (b, x, m2, re_type, boost::regex_constants::match_single_line)) {
+       } else if (regex_search (sp.b, sp.t, m2, re_type, boost::regex_constants::match_single_line)) {
             type.assign (m2[1].first, m2[1].second - m2[1].first);
         } else {
  #ifdef DEBUG2
-           std::cerr << "not match:" << ustring (b, x) << "\n";
+//         std::cerr << "not match:" << ustring (b, x) << "\n";
+           std::cerr << "not match:" << sp.pre () << "\n";
  #endif /* DEBUG */
         }
-       b = m[0].second;
-    }
-}
-
-#if 0
-bool  CGIFormFile::readFilename (int i, ustring& filename) {
-    if (0 <= i && i < parts.size ()) {
-       filename = filenames[i];
-       return true;
+//     b = m[0].second;
      }
-    return false;
  }
-#endif
  
  bool  CGIFormFile::saveFile (int i, const ustring& path, size_t max) {
      static size_t  bsize = 65536;
diff --git a/lib/formfile.h b/lib/formfile.h

index 48f3bc7..2e9f58b 100644 (file)
--- a/lib/formfile.h
+++ b/lib/formfile.h
@@ -11,6 +11,7 @@ class  CGIFormFile: public CGIForm {
   public:
      typedef std::pair<char*,char*>  part;
      typedef boost::unordered_map<int,int>  sary_t;
+    typedef boost::unordered_map<int,ustring>  tary_t;
  
      ustring  tmpfile;
      uregex  re1;
@@ -19,6 +20,7 @@ class  CGIFormFile: public CGIForm {
      char*  mapdata;
      size_t  mapsize;
      sary_t  datamap;
+    tary_t  typemap;
      std::vector<part>  parts;
  
      CGIFormFile () {
@@ -29,6 +31,7 @@ class  CGIFormFile: public CGIForm {
      };
  
      virtual int  partAt (int i);
+    virtual ustring  typeAt (int i);
      virtual void  read_multipart (MotorEnv* env);
      virtual bool  saveData (MotorEnv* env);
      virtual void  unlinkTmpFile ();
diff --git a/lib/http.cc b/lib/http.cc

index 08d210f..d380d3d 100644 (file)
--- a/lib/http.cc
+++ b/lib/http.cc
@@ -46,8 +46,6 @@ void  HTTPResponse::setCookie (const ustring& key, const ustring& val, const ust
      ustring  ck;
      ustring  u;
      size_t  len;
-    umatch  m;
-    static uregex  re ("//|/\\.|\\.\\.|[\\x00-\\x20\\x7f-\\xff]");
      
      if (key.size () <= 128 && val.size () <= 512) {
         ck = cookieencode (key);
@@ -67,7 +65,7 @@ void  HTTPResponse::setCookie (const ustring& key, const ustring& val, const ust
             ck.append (CharConst ("; expires="));
             ck.append (dateCookie (limit));
         }
-       if (domain.size () > 0 && checkDomain_dot (domain)) { // ???
+       if (domain.size () > 0 && matchDomain_dot (domain)) { // ???
             ck.append (CharConst ("; domain="));
             ck.append (domain);
         }
@@ -79,7 +77,7 @@ void  HTTPResponse::setCookie (const ustring& key, const ustring& val, const ust
      }
  }
  
-void  HTTPResponse::setCookiePair (uiterator& b, const uiterator& e) {
+void  HTTPResponse::setCookiePair (uiterator b, const uiterator e) {
      uiterator  m;
      ustring  key, val;
  
@@ -97,21 +95,13 @@ void  HTTPResponse::setCookiePair (uiterator& b, const uiterator& e) {
  }
  
  void  HTTPResponse::parseCookie () {
-    uiterator  b, e;
-    umatch  m;
-    static uregex  re (" *; *");
-
      cookieDone = true;
      cookie = getenvString (kHTTP_COOKIE);
-    b = cookie.begin ();
-    e = cookie.end ();
-    while (usearch (b, e, m, re)) {
-       if (b != m[0].first)
-           setCookiePair (b, m[0].first);
-       b = m[0].second;
+    SplitterFn  sp (cookie, findSepColon);
+    while (sp.next ()) {
+       if (sp.preSize () > 0)
+           setCookiePair (sp.begin (), sp.end ());
      }
-    if (b != e)
-       setCookiePair (b, e);
  }
  
  ustring  HTTPResponse::readCookie (const ustring& key) {
@@ -193,22 +183,19 @@ void  HTTPResponse::standardResponse_html (MotorOutput* out, MotorEnv* env) {
  
  void  HTTPResponse::disposition (MotorOutput* out, bool finline, const ustring& name) {
      ustring  n2;
-    Splitter  sp (name, re_q);
-
-    if (sp.next ()) {
-       if (sp.match (0)) {
-           n2.reserve (name.length ());
-           n2.append (sp.begin (), sp.end ());
+    SplitterCh sp (name, '\"');
+
+    if (sp.nextSep ()) {
+       n2.reserve (name.length ());
+       n2.append (sp.pre ());
+       n2.append (uUScore);
+       while (sp.nextSep ()) {
+           n2.append (sp.pre ());
             n2.append (uUScore);
-           while (sp.next ()) {
-               n2.append (sp.begin (), sp.end ());
-               if (sp.match (0)) {
-                   n2.append (uUScore);
-               }
-           }
-       } else {
-           n2 = name;
         }
+       n2.append (sp.pre ());
+    } else {
+       n2 = name;
      }
      if (finline) {
         out->out_raw (CharConst (kRES_DISP ": " kINLINE));
@@ -291,12 +278,20 @@ void  HTTPResponse::forbiddenResponse (MotorOutput* out, MotorEnv* env) {
  }
  
  void  HTTPResponse::setHeader (const ustring& key, const ustring& val) {
-    umatch  m;
-    static uregex  re ("^[a-zA-Z_0-9-]+$");
-
-    if (!usearch (key, m, re))
+    static char  table_httpheader[] = {                // [a-zA-Z_0-9-]
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 
+       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 
+       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 
+    };
+
+    if (! matchWordTbl (key.begin (), key.end (), table_httpheader))
          throw (key + ": bad header name.");
-    if (!checkASCII (val))
+    if (! matchASCII (val.begin (), val.end ()))
          throw (val + ": bad header value.");
      moreheader.push_back (std::pair<ustring,ustring> (key, val));
  }
diff --git a/lib/http.h b/lib/http.h

index 4695910..c4cf8c7 100644 (file)
--- a/lib/http.h
+++ b/lib/http.h
@@ -33,7 +33,7 @@ class  HTTPResponse {
      virtual void  printNoCache (MotorOutput* out);
      virtual void  printCookie (MotorOutput* out, MotorEnv* env);
      virtual void  setCookie (const ustring& key, const ustring& val, const ustring& path, time_t span, time_t limit, const ustring& domain, bool fsecure, MotorEnv* env);
-    virtual void  setCookiePair (uiterator& b, const uiterator& e);
+    virtual void  setCookiePair (uiterator b, const uiterator e);
      virtual void  parseCookie ();
      virtual ustring  readCookie (const ustring& key);
      virtual void  setRandomCookie (MotorEnv* env);
diff --git a/lib/ml.cc b/lib/ml.cc

index 69741a2..16d44b9 100644 (file)
--- a/lib/ml.cc
+++ b/lib/ml.cc
@@ -11,6 +11,126 @@
  #include <exception>
  #include <assert.h>
  #include <stdlib.h>
+#include <ctype.h>
+
+static bool  findSymSp (uiterator& b, uiterator e) {
+    int  c;
+    for (; b < e; ++ b) {
+       c = *b;
+       if (c == '\\' || c < ' ')       // 空白を含めない
+           return true;
+    }
+    return false;
+}
+
+static bool  matchSymOct (uiterator& b, uiterator e) {
+    int  c;
+    int  n = 0;
+    uiterator  p = b;
+    for (; p < e && n < 3; ++ p, ++ n) {
+       c = *p;
+       if ('0' <= c && c <= '7') {
+       } else {
+           return false;
+       }
+    }
+    b = p;
+    return true;
+}
+
+static bool  matchRealNum (uiterator&b, uiterator e) {
+    uiterator  p = b;
+    int  c;
+    bool  f = false;
+    if (p < e) {
+       c = *p;
+       if (c == '+' || c == '-')
+           ++ p;
+       while (1) {
+           if (p == e) {
+               goto Ep1;
+           } else if (isdigit ((c = *p))) {
+               ++ p;
+               f = true;
+           } else if (c == '.') {
+               ++ p;
+               break;
+           } else {
+               goto Ep1;
+           }
+       }
+       while (1) {
+           if (p == e) {
+               goto Ep1;
+           } else if (isdigit ((c = *p))) {
+               ++ p;
+               f = true;
+           } else if (c == 'e' || c == 'E') {
+               ++ p;
+               break;
+           } else {
+               goto Ep1;
+           }
+       }
+       if (p == e)
+           return false;
+       c = *p;
+       if (c == '+' || c == '-')
+           ++ p;
+       if (p == e)
+           return false;
+       while (1) {
+           if (p == e) {
+               goto Ep1;
+           } else if (isdigit ((c = *p))) {
+               ++ p;
+           } else {
+               break;
+           }
+       }
+    }
+ Ep1:
+    if (f) {
+       b = p;
+       return true;
+    } else {
+       return false;
+    }
+}
+
+static bool  matchSymbol_c (int c) {
+    static char  table_symbol[] = {    // x00-x20"'();[]{}
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+       0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 
+    };
+    return c < 0 || 128 <= c || table_symbol[c];
+}
+inline bool  matchSymbol (uiterator&b, uiterator e) {
+    return matchHeadFn (b, e, matchSymbol_c);
+}
+
+static bool  findNonSymbol_c (int c) { // '\'を含める
+    static char  table_nonsymbol[] = {         // x00-x20"'();[]{}\\   ;
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
+       1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 
+    };
+    return 0 <= c && c < 128 && table_nonsymbol[c];
+}
+inline bool  findNonSymbol (uiterator&b, uiterator e) { // '\'を含める
+    return findCharFn (b, e, findNonSymbol_c);
+}
  
  void  MNode::fdelete () {
  #ifdef DEBUG3
@@ -185,15 +305,19 @@ ustring  MNode::sym_to_string () {
      assert (type == MC_SYM);
      uiterator  b = sym->begin ();
      uiterator  e = sym->end ();
-    umatch  m;
-    if (b < e && usearch (b, e, m, re_nonsymbolchar)) {
+    uiterator  p;
+    p = b;
+    if (findNonSymbol (b, e)) {
         ustring  ans;
         do {
-           ans.append (b, m[0].first).append (1, '\\').append (octchar (*m[0].first));
-           b = m[0].second;
-       } while (b < e && usearch (b, e, m, re_nonsymbolchar));
-       if (b < e)
-           ans.append (b, e);
+           if (p < b)
+               ans.append (p, b);
+           ans.append (1, '\\').append (octchar (*b));
+           ++ b;
+           p = b;
+       } while (b < e && findNonSymbol (b, e));
+       if (p < e)
+           ans.append (p, e);
         return ans;
      } else {
         return *sym;
@@ -340,33 +464,36 @@ MNode*  quoted (MNode* v) {
  }
  
  MNode*  newMNode_sym (ustring* v) {
-    static uregex  re_special ("[\\x00-\x1f\\\\]");
-    static uregex  re_oct ("^[0-7][0-7][0-7]");
      MNode*  ans = new MNode;
      uiterator  b = v->begin ();
      uiterator  e = v->end ();
-    umatch  m;
+    uiterator  p;
+    int  c;
  
-    if (b < e && usearch (b, e, m, re_special)) {
+    p = b;
+    if (findSymSp (b, e)) {
         ustring*  w = new ustring;
         do {
-           w->append (b, m[0].first);
-           b = m[0].second;
-           if (*(m[0].first) == '\\') {
-               if (b < e && usearch (b, e, m, re_oct)) {
-                   int  c = octchar (b);
+           if (p < b)
+               w->append (p, b);
+           c = *b;
+           ++ b;
+           if (c == '\\') {
+               p = b;
+               if (matchSymOct (b, e)) {
+                   c = octchar (p);
                     if (32 <= c && c < 127)
                         w->append (1, c);
-                   b = m[0].second;
                 } else {
                     w->append (1, '\\');
                 }
             } else {
                 // skip;
             }
-       } while (b < e && usearch (b, e, m, re_special));
-       if (b < e)
-           w->append (b, e);
+           p = b;
+       } while (findSymSp (b, e));
+       if (p < e)
+           w->append (p, e);
         ans->set_sym (w);
         delete v;
      } else {
@@ -581,7 +708,7 @@ ustring  formatString (const ustring& format, boost::ptr_vector<MNodePtr>& par)
      umatch  m;
      u_int  i;
      MNode*  a;
-    static uregex  re ("\\$\\{([1-9][0-9]*)(:([a-zA-Z][a-zA-Z0-9]*)(:([0-9a-z.:]+))?)?\\}");
+    uregex  re ("\\$\\{([1-9][0-9]*)(:([a-zA-Z][a-zA-Z0-9]*)(:([0-9a-z.:]+))?)?\\}");
      static struct {
         const char* name;
         size_t  namelen;
@@ -618,7 +745,7 @@ ustring  formatString (const ustring& format, boost::ptr_vector<MNodePtr>& par)
             std::vector<ustring>  fpar;
             int  i;
             if (m[4].matched)
-               split (m[5].first, m[5].second, re_colon, fpar);
+               split (m[5].first, m[5].second, ':', fpar);
             for (i = 0; formatFunc[i].name; i ++) {
                 if (match (m[3].first, m[3].second, formatFunc[i].name, formatFunc[i].namelen)) {
                     (*formatFunc[i].fn) (ans, a, fpar);
@@ -801,7 +928,7 @@ void  MotorTexp::scan (const ustring& text, bool skip) {
      }
      while (b != e) {
         if (skip) {
-           skipWhite (linenum, b, e);
+           skipBlank (linenum, b, e);
             if (*b == '<') {
  #ifdef DEBUG
  //             std::cerr << "---:" << ustring (b, b + 20) << "\n";
@@ -819,22 +946,18 @@ void  MotorTexp::scan (const ustring& text, bool skip) {
  }
  
  void  MotorTexp::skipHead (uiterator& b, uiterator& e, int& linenum) {
-    umatch  m;
-    while (b != e && (*b != '(' && *b != ';')) {
-       if (usearch (b, e, m, re_nl)) {
-           b = m[0].second;
+    while (b < e && (*b != '(' && *b != ';')) {
+       if (findNLb (b, e)) {
             linenum ++;
-       } else {
-           b = e;
         }
      }
  }
  
  void  MotorTexp::scanWord (int& linenum, uiterator& b, uiterator& e, word_type& type, ustring*& ans) {
      ustring::value_type  c;
-    umatch  m;
+    uiterator  p;
   Ep1:;
-    skipWhite (linenum, b, e);
+    skipBlank (linenum, b, e);
      if (b != e) {
         c = *b;
         switch (c) {
@@ -858,13 +981,11 @@ void  MotorTexp::scanWord (int& linenum, uiterator& b, uiterator& e, word_type&
             break;
         case ';':
             b ++;
-           if (usearch (b, e, m, re_nl)) {
-               b = m[0].second;
+           if (findNLb (b, e)) {
                 linenum ++;
                 goto Ep1;
             } else {
                 // end of comment but without nl.
-               b = e;
                 type = YYNONE;
             }
             break;
@@ -885,22 +1006,20 @@ void  MotorTexp::scanWord (int& linenum, uiterator& b, uiterator& e, word_type&
             type = YYREN3;
             break;
         default:
-           if (usearch (b, e, m, re_realnumber)) {
+           p = b;
+           if (matchRealNum (b, e)) {
                 type = YYNUM;
-               ans = new ustring (m[0]);
-               b = m[0].second;
-           } else if (usearch (b, e, m, re_symbol)) {
-               assert (b == m[0].first);
-               if (*b == '.' && m[0].second - m[0].first == 1) {
+               ans = new ustring (p, b);
+           } else if (matchSymbol (b, e)) {
+               if (*p == '.' && b - p == 1) {
                     type = YYPERIOD;
-                   b = m[0].second;
                 } else {
                     type = YYSIM;
-                   ans = new ustring (m[0]);
-                   b = m[0].second;
+                   ans = new ustring (p, b);
                 }
-               skipWhite (linenum, b, e);
+               skipBlank (linenum, b, e);
             } else {
+//             std::cerr << "error\n";
                 assert (0);
             }
         }
@@ -911,11 +1030,9 @@ void  MotorTexp::scanWord (int& linenum, uiterator& b, uiterator& e, word_type&
  
  ustring*  MotorTexp::scanText (int& linenum, uiterator& b, uiterator& e) {
      ustring::value_type  c;
-    uiterator  p = b;
+    uiterator  p;
      ustring*  ans = new ustring;
-    umatch  m;
      uint32_t  v;
-    static uregex  re_hex4 ("^[0-9a-fA-F]{4}");
  
      ans->reserve (128);
      while (b != e) {
@@ -945,14 +1062,12 @@ ustring*  MotorTexp::scanText (int& linenum, uiterator& b, uiterator& e) {
                     linenum ++;
                     break;
                 default:
-                   if (e - b >= 4 && usearch (b, e, m, re_hex4)) {
-                       v = hextoul (m[0].first, m[0].second);
-                       if (v >= 32) {
+                   p = b;
+                   if (e - b >= 4 && matchHex4 (b, e)) {
+                       v = hextoul (p, b);
+                       if (v == '\t' || v == '\n' || v == '\r' || v >= 32) {
                             std::wstring  w (1, v);
                             ans->append (wtou (w));
-                           b += 4;
-                       } else {
-                           goto bp1;
                         }
                         break;
                     }
@@ -973,14 +1088,22 @@ ustring*  MotorTexp::scanText (int& linenum, uiterator& b, uiterator& e) {
      return ans;
  }
  
-void  MotorTexp::skipWhite (int& linenum, uiterator& b, uiterator& e) {
+void  MotorTexp::skipBlank (int& linenum, uiterator& b, uiterator& e) {
      ustring::value_type  c;
  
-    for (; b != e; b ++) {     // hack
+    while (b < e) {    // hack
         c = *b;
         if (0 <= c && c <= ' ') {
-           if (c == '\n') {
-               linenum ++;
+           if (c == '\r') {
+               ++ linenum;
+               ++ b;
+               if (b < e && *b == '\n')
+                   ++ b;
+           } else if (c == '\n') {
+               ++ linenum;
+               ++ b;
+           } else {
+               ++ b;
             }
         } else {
             break;
@@ -1008,6 +1131,8 @@ void  MotorTexp::scanQuote (int& linenum, uiterator& b, uiterator& e, MNode* cel
      if (b != e) {
         if (scanTexp (linenum, b, e, cell, false, YYNONE))
             throw (uErrorSyntax);       // ')
+    } else {
+       throw (uErrorSyntax);
      }
  }
  
@@ -1179,6 +1304,23 @@ bool  MotorTexp::scanTexp (int& linenum, uiterator& b, uiterator& e, MNode*& cel
      return false;
  }
  
+bool  MotorTexp::matchHex4 (uiterator& b, uiterator e) {
+    uiterator  p;
+    int  n = 0;
+    int  c;
+    for (p = b; p < e && n < 4; ++ p, ++ n) {
+       c = *b;
+       if (('0' <= c && c <= '9')
+           || ('a' <= c && c <= 'f')
+           || ('A' <= c && c <= 'F')) {
+       } else {
+           return false;
+       }
+    }
+    b = p;
+    return true;
+}
+
  void  nextNode (MNode*& arg) {
      if (arg) {
         switch (arg->type) {
diff --git a/lib/ml.h b/lib/ml.h

index 86ed5a4..e7651b6 100644 (file)
--- a/lib/ml.h
+++ b/lib/ml.h
@@ -459,12 +459,13 @@ class  MotorTexp {
      virtual void  skipHead (uiterator& b, uiterator& e, int& linenum);
      virtual void  scanWord (int& linenum, uiterator& b, uiterator& e, word_type& type, ustring*& ans);
      virtual ustring*  scanText (int& linenum, uiterator& b, uiterator& e);
-    virtual void  skipWhite (int& linenum, uiterator& b, uiterator& e);
+    virtual void  skipBlank (int& linenum, uiterator& b, uiterator& e);
      virtual bool  scanCar (int& linenum, uiterator& b, uiterator& e, MNode* cell);
      virtual void  scanQuote (int& linenum, uiterator& b, uiterator& e, MNode* cell);
      virtual void  scanVector (int& linenum, uiterator& b, uiterator& e, MNode* cell);
      virtual void  scanTable (int& linenum, uiterator& b, uiterator& e, MNode* cell);
      virtual bool  scanTexp (int& linenum, uiterator& b, uiterator& e, MNode*& cell, bool qcdr, word_type closing);
+    virtual bool  matchHex4 (uiterator& b, uiterator e);
  };
  
  class  MLFunc {
diff --git a/lib/motor.cc b/lib/motor.cc

index 69ee2d4..d3467d2 100644 (file)
--- a/lib/motor.cc
+++ b/lib/motor.cc
@@ -4,6 +4,7 @@
  #include "mlenv.h"
  #include "expr.h"
  #include "util_const.h"
+#include "util_check.h"
  #include "util_file.h"
  #include "util_splitter.h"
  #include "util_string.h"
@@ -303,13 +304,8 @@ void  HTMLMotor::compile (const ustring& text, bool skipHead) {
      begin = text.begin ();
      end = text.end ();
      if (skipHead) {
-       umatch  m;
         while (begin != end && *begin != '<') {
-           if (usearch (begin, end, m, re_nl)) {
-               begin = m[0].second;
-           } else {
-               begin = end;
-           }
+           findNLb (begin, end);
         }
      }
      s1 (&objs, term_none, NULL);
@@ -554,7 +550,7 @@ int  HTMLMotor::s3 (MotorObj::MotorObjVec* sobjs, uiterator start, bool fbang, u
  
  int  HTMLMotor::s4 (MotorObj::MotorObjVec* sobjs, uiterator start, upair name) { // [[NAME:
      // start points after the colon.
-    umatch  m;
+//    umatch  m;
  
  #ifdef DEBUG2
      cerr << "s4 ()  start:" << ustring (start, start + 8) << "...  name:" << ustring (name.first, name.second) << "\n";
@@ -563,7 +559,7 @@ int  HTMLMotor::s4 (MotorObj::MotorObjVec* sobjs, uiterator start, upair name) {
         if (! s5 (sobjs, start)) {
             return 0;                   // NG
         }
-    } else if (usearch (name.first, name.second, m, re_digits)) {// [[NUM:
+    } else if (matchNum (name.first, name.second)) {// [[NUM:
         AutoDelete<MotorObjTempl>  o;
         uiterator  b = begin;
  
@@ -574,10 +570,8 @@ int  HTMLMotor::s4 (MotorObj::MotorObjVec* sobjs, uiterator start, upair name) {
             return 0;                   // NG
         }
         sobjs->push_back (o.release ());
-    } else {                           // [[FUNC:
-       if (! s8 (sobjs, name, start)) {
-           return 0;
-       }
+    } else if (! s8 (sobjs, name, start)) {    // [[FUNC:
+       return 0;
      }
      return 1;
  }
@@ -646,8 +640,6 @@ int  HTMLMotor::s7 (MotorObj::MotorObjVec* sobjs) {
  
  int  HTMLMotor::s8 (MotorObj::MotorObjVec* sobjs, upair name, uiterator start) { // [[FUNC:
      ustring  s;
-//    static uregex  re_colon_bra ("(:)|(\\]\\]" cECOM ")|&(.);");
-//    Splitter  sp (start, end, re_colon_bra);
      MFTable::iterator  it;
      MFTable2::iterator  it2;
  
@@ -675,44 +667,11 @@ int  HTMLMotor::s8 (MotorObj::MotorObjVec* sobjs, upair name, uiterator start) {
         sobjs->push_back (o.release ());
      }
      return 1;                  // OK
-
-#if 0
-#ifdef DEBUG2
-    std::cerr << "s8  start:" << *start << "\n";
-#endif /* DEBUG */
-    while (sp.next ()) {
-       s.append (sp.begin (), sp.end ());
-       if (sp.match (1)) {             // :
-           o ()->args.push_back (s);
-           s.resize (0);
-       } else if (sp.match (2)) {      // ]]
-           o ()->args.push_back (s);
-           s.resize (0);
-           begin = sp.matchEnd ();
-           goto Ex1;
-       } else if (sp.match (3)) {      // &x;
-           s.append (sp.matchBegin (3), sp.matchEnd (3));
-       } else if (! sp.match (0)) {
-           return 0;                   // NG
-       } else {
-           assert (0);
-       }
-    }
-#ifdef DEBUG2
-    std::cerr << "unexpected end\n";
-#endif /* DEBUG */
-    return 0;                          // NG
-    
- Ex1:;
-    sobjs->push_back (o.release ());
-
-    return 1;
-#endif
  }
  
  int  HTMLMotor::s9 (std::vector<ustring>& args, uiterator start) {
      static uregex  re_colon_bra ("(:)|(\\]\\]" cECOM ")|&(.);");
-    Splitter  sp (start, end, re_colon_bra);
+    SplitterRe  sp (start, end, re_colon_bra);
      ustring  s;
  
      while (sp.next ()) {
@@ -738,7 +697,7 @@ int  HTMLMotor::s9 (std::vector<ustring>& args, uiterator start) {
  
  int  HTMLMotor::s10 (std::vector<ustring>& args, MotorObj::MotorObjVec& arg2, uiterator start) {
      static uregex  re_colon_spc_bra ("(:)|( )|(\\]\\]" cECOM ")|&(.);");
-    Splitter  sp (start, end, re_colon_spc_bra);
+    SplitterRe  sp (start, end, re_colon_spc_bra);
      ustring  s;
  
      while (sp.next ()) {
diff --git a/lib/motorenv.cc b/lib/motorenv.cc

index c80a4ec..e8d827e 100644 (file)
--- a/lib/motorenv.cc
+++ b/lib/motorenv.cc
@@ -113,7 +113,7 @@ bool  MotorEnv::path_resource (const ustring& name, ustring& ans) {
         if (top.length () > 0) {
             r = top + name;
             shapePath (r);
-           if (matchHead (r, top) && checkAbsoluteResourceName (r) && isPlainFile (r)) {
+           if (matchHead (r, top) && matchAbsoluteResourceName (r) && isPlainFile (r)) {
                 ans = r;
                 return true;
             } else {
@@ -143,7 +143,7 @@ bool  MotorEnv::path_resource (const ustring& name, ustring& ans) {
  #ifdef DEBUG
  //         std::cerr << "r:" << r << "\n";
  #endif /* DEBUG */
-           if (matchHead (r, top) && checkAbsoluteResourceName (r) && isPlainFile (r)) {
+           if (matchHead (r, top) && matchAbsoluteResourceName (r) && isPlainFile (r)) {
                 ans = r;
                 return true;
  #ifdef STANDALONE
@@ -156,7 +156,7 @@ bool  MotorEnv::path_resource (const ustring& name, ustring& ans) {
  #ifdef STANDALONE
             r = name;
             shapePath (r);
-           if (checkResourceName (r) && isPlainFile (r)) {
+           if (matchResourceName (r) && isPlainFile (r)) {
                 ans = r;
                 return true;
             }
@@ -172,7 +172,7 @@ bool  MotorEnv::path_resource (const ustring& name, ustring& ans) {
         if (documentRoot.length () > 0 && r.length () > 0) {
             r = documentRoot + uSlash + r + uSlash + name;
             shapePath (r);
-           if (matchHead (r, top) && checkAbsoluteResourceName (r) && isPlainFile (r)) {
+           if (matchHead (r, top) && matchAbsoluteResourceName (r) && isPlainFile (r)) {
                 ans = r;
                 return true;
             }
@@ -262,7 +262,7 @@ ustring  MotorEnv::path_storage_file (const ustring& name, const char* suffix) {
  ustring  MotorEnv::path_static_file (const ustring& name) {
      ustring  ans;
  
-    if (! checkResourceName (name))
+    if (! matchResourceName (name))
         throw (name + uErrorBadFile);
      if (path_resource (name, ans)) {
      } else {
@@ -335,7 +335,7 @@ void  MotorEnv::setDatastore (const ustring& name) {
      if (name.size () == 0) {
         datastore = appenv->datastore;
      } else {
-       if (! checkName (name))
+       if (! matchName (name))
             throw (name + uErrorBadDatastore);
         datastore = name;
      }
diff --git a/lib/motoroutput.h b/lib/motoroutput.h

index 4a2b2a6..da081ef 100644 (file)
--- a/lib/motoroutput.h
+++ b/lib/motoroutput.h
@@ -2,6 +2,7 @@
  #define MOTOROUTPUT_H
  
  #include "util_const.h"
+#include "util_regex.h"
  #include "ustring.h"
  
  class  MotorOutput {
diff --git a/lib/ustring.h b/lib/ustring.h

index ea9fd30..1d2d2d6 100644 (file)
--- a/lib/ustring.h
+++ b/lib/ustring.h
@@ -3,7 +3,7 @@
  
  #include <string>
  #include <utility>
-#include <boost/regex.hpp>
+#include <unistd.h>
  
  inline char*  char_type (u_char* v) {return (char*)v;}
  inline char*  char_type (char* v) {return v;}
@@ -18,15 +18,6 @@ inline char*  noconst_char (const char* v) {return (char*)v;}
  typedef std::basic_string<char>                ustring;
  typedef ustring::const_iterator                uiterator;
  typedef std::pair<ustring::const_iterator, ustring::const_iterator>  upair;
-typedef boost::match_results<ustring::const_iterator>  umatch;
-typedef boost::basic_regex<char, boost::regex_traits<char> >  uregex;
-
-inline bool  usearch (ustring::const_iterator first, ustring::const_iterator last, umatch& m, const uregex& re, boost::match_flag_type flags = boost::regex_constants::match_single_line) {
-    return regex_search (first, last, m, re, flags);
-}
-inline bool  usearch (const ustring& s, umatch& m, const uregex& re, boost::match_flag_type flags = boost::regex_constants::match_single_line) {
-    return regex_search (s.begin (), s.end (), m, re, flags);
-}
  
  inline int  match (upair& p, const u_char* s, ustring::size_type len) {
      ustring::size_type  n = p.second - p.first;
diff --git a/lib/utf8.cc b/lib/utf8.cc

index 12a0b6a..3ba18b0 100644 (file)
--- a/lib/utf8.cc
+++ b/lib/utf8.cc
@@ -1,5 +1,6 @@
  #include "utf8.h"
  #include "util_const.h"
+#include "util_splitter.h"
  #include "ustring.h"
  #include "cdbobj.h"
  #include <iostream>
@@ -271,28 +272,38 @@ ustring  ellipsis (const ustring& text, int limit) {
      return u;
  }
  
-ustring  logText (const ustring& text) {
-    uiterator  b = text.begin ();
-    uiterator  e = text.end ();
-    umatch  m;
-    ustring  u;
-    static uregex  re ("[\\000-\\037\\0177]");
-
-    u.reserve (256);
-    while (usearch (b, e, m, re)) {
-       if (b != m[0].first)
-           u += ustring (b, m[0].first);
-       if (*m[0].first == '\n') {
-           u += "//";
-       } else {
-           u += '_';
+static bool  findCtrlChar (uiterator& b, uiterator e, uiterator& u) {
+    int  c;
+    for (; b < e; ++ b) {
+       c = *b;
+       if ((0 <= c && c < 0x20) || c == 0x7f) {        // [\x00-\x1f\x7f]
+           u = b + 1;
+           return true;
         }
-       b = m[0].second;
      }
-    if (b != e) {
-       u += ustring (b, e);
+    u = e;
+    return false;
+}
+
+ustring  logText (const ustring& text) {
+    SplitterFn  sp (text, findCtrlChar);
+    if (sp.nextSep ()) {
+       ustring  ans;
+       do {
+           if (sp.preSize () > 0)
+               ans.append (sp.pre ());
+           if (*sp.end () == '\n') {
+               ans.append (CharConst ("//"));
+           } else {
+               ans.append (uUScore);
+           }
+       } while (sp.nextSep ());
+       if (sp.preSize () > 0)
+           ans.append (sp.pre ());
+       return ans;
+    } else {
+       return text;
      }
-    return u;
  }
  
  void  clipEnd (ustring& val, uregex& re1, uregex& re2) {
@@ -317,15 +328,15 @@ void  clipEnd (ustring& val, uregex& re1, uregex& re2) {
  }
  
  void  clipWhiteEnd (ustring& val) {
-    static uregex  re1 ("^(" UTF8_SPACE "|" UTF8_ZWSPACE "|" UTF8_IDEOSPACE ")+");
-    static uregex  re2 ("(" UTF8_SPACE "|" UTF8_ZWSPACE "|" UTF8_IDEOSPACE ")+$");
+    uregex  re1 ("^(" UTF8_SPACE "|" UTF8_ZWSPACE "|" UTF8_IDEOSPACE ")+");
+    uregex  re2 ("(" UTF8_SPACE "|" UTF8_ZWSPACE "|" UTF8_IDEOSPACE ")+$");
  
      clipEnd (val, re1, re2);
  }
  
  void  clipNLEnd (ustring& val) {
-    static uregex  re1 ("^\\n+");
-    static uregex  re2 ("\\n+$");
+    uregex  re1 ("^\\n+");
+    uregex  re2 ("\\n+$");
  
      clipEnd (val, re1, re2);
  }
diff --git a/lib/utf8.h b/lib/utf8.h

index 828edd8..ff9a106 100644 (file)
--- a/lib/utf8.h
+++ b/lib/utf8.h
@@ -2,6 +2,7 @@
  #define UTF8_H
  
  #include "ustring.h"
+#include "util_regex.h"
  
  #define  UTF8_SPACE            "\x20"
  #define  UTF8_NBSPACE          "\xc2\xa0"
diff --git a/lib/util_apache.cc b/lib/util_apache.cc

index 34ac371..df338b6 100644 (file)
--- a/lib/util_apache.cc
+++ b/lib/util_apache.cc
@@ -11,7 +11,7 @@ ustring  apacheAbsolutePath (const ustring& url) {
      ustring  ans;
      std::vector<ustring>  ary;
      std::vector<ustring>::iterator  it;
-    Splitter  sp (url.begin (), url.end (), re_slash);
+    SplitterCh  sp (url, '/');
      uiterator  b, e;
      bool  fdirpath;
      size_t  len = url.length ();
@@ -24,10 +24,6 @@ ustring  apacheAbsolutePath (const ustring& url) {
      if (isAbsolutePath (url)) {
         sp.next ();
      } else {
-/*     ustring  e = getenvString (kSCRIPT_NAME);
-       ustring  p = getenvString (kPATH_INFO);
-       if (p.length () > 0) e.append (p);
-*/
         // mod_rewriteで書き換えた時、元のURLを返す
         ustring  e = getenvString (kREQUEST_URI);
         // REQUEST_URIはデコードされていない。
@@ -35,7 +31,7 @@ ustring  apacheAbsolutePath (const ustring& url) {
         if (p != ustring::npos)
             e.resize (p);
         e = percentDecode (e);  // ディレクトリパスをチェック前にデコードする。
-       splitE (e.begin (), e.end (), re_slash, ary);
+       splitE (e.begin (), e.end (), '/', ary);
         if (ary.size () > 0 && ary.back ().length () > 0) {
             ary.pop_back();
         }
diff --git a/lib/util_check.cc b/lib/util_check.cc

index a8b8b89..b8b5dd3 100644 (file)
--- a/lib/util_check.cc
+++ b/lib/util_check.cc
@@ -4,6 +4,7 @@
  #include "httpconst.h"
  #include "motorconst.h"
  #include "ustring.h"
+#include <ctype.h>
  
  bool  checkRe (const ustring& name, const uregex& re) {
      umatch  m;
@@ -15,7 +16,7 @@ bool  checkRe (const ustring& name, const uregex& re) {
      }
  }
  
-bool  checkRe (const uiterator& b, const uiterator& e, const uregex& re) {
+bool  checkRe (uiterator b, uiterator e, const uregex& re) {
      umatch  m;
  
      if (usearch (b, e, m, re)) {
@@ -25,56 +26,58 @@ bool  checkRe (const uiterator& b, const uiterator& e, const uregex& re) {
      }
  }
  
-bool  checkName (const ustring& name) {
+bool  matchName (const ustring& name) {
      static uregex  re ("^" kWNAME "{0,31}$");
  
      return (checkRe (name, re));
  }
  
-bool  checkFilename (const ustring& name) {
+bool  matchFilename (const ustring& name) {
      static uregex  re ("^" kWNAME "{1,127}(\\." kWORD "{1,16})?$");
  
      return (checkRe (name, re));
  }
  
-bool  checkResourceName (const ustring& name) {
+bool  matchResourceName (const ustring& name) {
  //    static uregex  re ("^(" kWNAME "{0,127}/)*" kWNAME "{0,127}(\\." kWORD "{1,16})?$");
      static uregex  re ("^([a-zA-Z0-9_][a-zA-Z0-9_.-]{0,127}/)*[a-zA-Z0-9_][a-zA-Z0-9_.-]{0,127}(\\." kWORD "{1,16})?$");
  
      return (checkRe (name, re));
  }
  
-bool  checkAbsoluteResourceName (const ustring& name) {
+bool  matchAbsoluteResourceName (const ustring& name) {
      static uregex  re ("^/(" kFName "/)*" kFName "$");
  
      return (checkRe (name, re));
  }
  
-bool  checkASCII (const ustring& name) {
-    static uregex  re ("[^ -\\x7e]");
+static bool  isPrintableAscii (int c) {
+    return 0x20 <= c && c <= 0x7e;
+}
  
-    return (! checkRe (name, re));
+bool  matchASCII (uiterator b, uiterator e) {  // [ -\x7e]
+    return matchWordFn (b, e, isPrintableAscii);
  }
  
-bool  checkIP (const ustring& name) {
+bool  matchIP (const ustring& name) {
      static uregex  re ("^[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}$");
  
      return (checkRe (name, re));
  }
  
-bool  checkDomain_dot (const ustring& name) {
+bool  matchDomain_dot (const ustring& name) {
      static uregex  re ("^\\.?([a-zA-Z0-9-]+\\.)*([a-zA-Z0-9-]+)$");
  
      return (checkRe (name, re));
  }
  
-bool  checkHostname (const ustring& name) {
+bool  matchHostname (const ustring& name) {
      static uregex  re ("^[a-zA-Z0-9][a-zA-Z0-9\\-]*(\\.[a-zA-Z0-9][a-zA-Z0-9\\-]*)*$");
  
      return (checkRe (name, re));
  }
  
-bool  checkMimeType (const ustring& name) {
+bool  matchMimeType (const ustring& name) {
      static  uregex re ("^[a-z_0-9-]+/[a-z_0-9.+*-]+$");
  
      return (checkRe (name, re));
@@ -86,47 +89,43 @@ bool  checkMailAddr (const ustring& name) {
      return (checkRe (name, re));
  }
  
-bool  checkAlNum (const uiterator& b, const uiterator& e) {
-    static  uregex re ("^[a-zA-Z_0-9]+$");
-
-    return (checkRe (b, e, re));
-}
-
-bool  checkNum (const ustring& text) {
-    return (checkNum (text.begin (), text.end ()));
-}
-
-bool  checkNum (const uiterator& b, const uiterator& e) {
-    return (checkRe (b, e, re_digits));
+bool  matchAlNum (uiterator b, uiterator e) {
+    static char  table_alnum[] = {     // [a-zA-Z_0-9]
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 
+       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 
+       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 
+    };
+    return matchWordTbl (b, e, table_alnum);
+}
+
+bool  matchNum (uiterator b, uiterator e) {    
+    for (; b < e; ++ b) {
+       if (! isdigit (*b))
+           return false;
+    }
+    return true;
  }
  
-bool  checkWidth (const uiterator& b, const uiterator& e) {
+bool  matchWidth (uiterator b, uiterator e) {
      static uregex  re_width ("^[0-9]+(\\.[0-9]+)?(%|px|pt|in|mm|cm|em|ex)?$");
      return (checkRe (b, e, re_width));
  }
  
-bool  checkWidth (const ustring& text) {
-    return (checkWidth (text.begin (), text.end ()));
-}
-
-bool  checkColor (const uiterator& b, const uiterator& e) {
+bool  checkColor (uiterator b, uiterator e) {
      static uregex  re_color ("^#([0-9a-fA-F]{3}){1,2}$");
      return (checkRe (b, e, re_color));
  }
  
-bool  checkColor (const ustring& text) {
-    return (checkColor (text.begin (), text.end ()));
-}
-
-bool  checkWikiID (const uiterator& b, const uiterator& e) {
+bool  matchWikiID (uiterator b, uiterator e) {
      static uregex  re_wikiid ("^" rWikiID "$");
      return (checkRe (b, e, re_wikiid));
  }
  
-bool  checkWikiID (const ustring& text) {
-    return (checkWikiID (text.begin (), text.end ()));
-}
-
  bool  checkAry (const ustring& name) {
      return (name.length () > 0 && name[0] == '@');
  }
diff --git a/lib/util_check.h b/lib/util_check.h

index ece9385..3918cdb 100644 (file)
--- a/lib/util_check.h
+++ b/lib/util_check.h
@@ -2,6 +2,7 @@
  #define UTIL_CHECK_H
  
  #include "ustring.h"
+#include "util_regex.h"
  
  #define  UA_Windows    0x00000001
  #define  UA_Mac                0x00000002
@@ -13,26 +14,34 @@
  #define  UA_NetFront   0x00001000
  
  bool  checkRe (const ustring& name, const uregex& re);
-bool  checkRe (const uiterator& b, const uiterator& e, const uregex& re);
-bool  checkName (const ustring& name);
-bool  checkFilename (const ustring& name);
-bool  checkResourceName (const ustring& name);
-bool  checkAbsoluteResourceName (const ustring& name);
-bool  checkASCII (const ustring& name);
-bool  checkIP (const ustring& name);
-bool  checkDomain_dot (const ustring& name);
-bool  checkHostname (const ustring& name);
-bool  checkMimeType (const ustring& name);
+bool  checkRe (uiterator b, uiterator e, const uregex& re);
+bool  matchName (const ustring& name);
+bool  matchFilename (const ustring& name);
+bool  matchResourceName (const ustring& name);
+bool  matchAbsoluteResourceName (const ustring& name);
+bool  matchASCII (uiterator b, uiterator e);
+bool  matchIP (const ustring& name);
+bool  matchDomain_dot (const ustring& name);
+bool  matchHostname (const ustring& name);
+bool  matchMimeType (const ustring& name);
  bool  checkMailAddr (const ustring& name);
-bool  checkAlNum (const uiterator& b, const uiterator& e);
-bool  checkNum (const ustring& text);                  // [0-9]+
-bool  checkNum (const uiterator& b, const uiterator& e);
-bool  checkWidth (const uiterator& b, const uiterator& e);
-bool  checkWidth (const ustring& text);
-bool  checkColor (uiterator& b, uiterator& e);
-bool  checkColor (const ustring& text);
-bool  checkWikiID (const uiterator& b, const uiterator& e);
-bool  checkWikiID (const ustring& text);
+bool  matchAlNum (uiterator b, uiterator e);
+bool  matchNum (uiterator b, uiterator e);                     // [0-9]+
+inline bool  matchNum (const ustring& text) {
+    return matchNum (text.begin (), text.end ());
+}
+bool  matchWidth (uiterator b, uiterator e);
+inline bool  matchWidth (const ustring& text) {
+    return matchWidth (text.begin (), text.end ());
+}
+bool  checkColor (uiterator b, uiterator e);
+inline bool  checkColor (const ustring& text) {
+    return checkColor (text.begin (), text.end ());
+}
+bool  matchWikiID (uiterator b, uiterator e);
+inline bool  matchWikiID (const ustring& text) {
+    return matchWikiID (text.begin (), text.end ());
+}
  bool  checkAry (const ustring& name);
  bool  checkAry (const ustring& name, ustring& sym);
  bool  isHTTPS ();
diff --git a/lib/util_const.cc b/lib/util_const.cc

index c116663..353a88c 100644 (file)
--- a/lib/util_const.cc
+++ b/lib/util_const.cc
@@ -83,20 +83,6 @@ const char*  WStr[] = {
      "Thursday", "Friday", "Saturday"
  };
  
-uregex  re_tab ("\t");
-uregex  re_lf ("\\n");
-uregex  re_nl ("\\r\\n?|\\n");
-uregex  re_slash ("/");
-uregex  re_colon (":");
-uregex  re_comma (",");
-//uregex  re_amp ("&");
-//uregex  re_eq ("=");
-uregex  re_digits ("^[0-9]+$");
-uregex  re_realnumber ("^[+-]?[0-9]+(\\.[0-9]*)?([eE][+-]?[0-9]+)?");
-uregex  re_symbol ("^[^\\x00- \"'();\\[\\]\\{\\}]+");
-uregex  re_nonsymbolchar ("[\\x00- \"'();\\[\\]\\{\\}\\\\]");  // \を含める
-uregex  re_q ("\"");
-
  MNode*  mlTrue = NULL;
  static MNodePtr  trueHolder;
  
diff --git a/lib/util_const.h b/lib/util_const.h

index 2ad1762..2d79912 100644 (file)
--- a/lib/util_const.h
+++ b/lib/util_const.h
@@ -72,19 +72,6 @@ extern ustring  uHttps;
  extern ustring  uDefault;
  extern ustring  uAssoc;
  
-extern uregex  re_tab;
-extern uregex  re_lf;
-extern uregex  re_nl;
-extern uregex  re_slash;
-extern uregex  re_colon;
-extern uregex  re_comma;
-//extern uregex  re_amp;
-//extern uregex  re_eq;
-extern uregex  re_digits;
-extern uregex  re_realnumber;
-extern uregex  re_symbol;
-extern uregex  re_nonsymbolchar;
-extern uregex  re_q;
  extern MNode*  mlTrue;
  
  #endif /* UTIL_CONST_H */
diff --git a/lib/util_file.cc b/lib/util_file.cc

index 0b337e4..6c95440 100644 (file)
--- a/lib/util_file.cc
+++ b/lib/util_file.cc
@@ -2,6 +2,7 @@
  #include "config.h"
  #include "ustring.h"
  #include "util_const.h"
+#include "util_splitter.h"
  #include "filemacro.h"
  #include <iostream>
  #include <sys/types.h>
@@ -84,21 +85,10 @@ void  writeFile (const ustring& filename, ustring& data) {
    top must end with slash.
  */
  void  makeSubdir (ustring& top, const ustring& sub) {
-    uiterator  b, e;
-    umatch  m;
+    SplitterCh  sp (sub, '/');
  
-    b = sub.begin ();
-    e = sub.end ();
-    while (b != e && usearch (b, e, m, re_slash)) {
-       top.append (b, m[0].second);
-       mkdir (top.c_str (), 0777);
-#ifdef DEBUG2
-       std::cerr << "mkdir:" << top << "\n";
-#endif /* DEBUG */
-       b = m[0].second;
-    }
-    if (b != e) {
-       top.append (b, e);
+    while (sp.next ()) {
+       top.append (sp.b, sp.u);
         mkdir (top.c_str (), 0777);
  #ifdef DEBUG2
         std::cerr << "mkdir:" << top << "\n";
diff --git a/lib/util_random.cc b/lib/util_random.cc

index 0c177fc..7c4db3b 100644 (file)
--- a/lib/util_random.cc
+++ b/lib/util_random.cc
@@ -6,6 +6,7 @@
  #include <stdio.h>
  #include <sys/types.h>
  #include <sys/uio.h>
+#include <assert.h>
  
  static int  Inited = 0;
  static unsigned long  Seed;
diff --git a/lib/util_regex.h b/lib/util_regex.h

new file mode 100644 (file)

index 0000000..31a747e
--- /dev/null
+++ b/lib/util_regex.h
@@ -0,0 +1,17 @@
+#ifndef UTIL_REGEXP_H
+#define UTIL_REGEXP_H
+
+#include "ustring.h"
+#include <boost/regex.hpp>
+
+typedef boost::match_results<ustring::const_iterator>  umatch;
+typedef boost::basic_regex<char, boost::regex_traits<char> >  uregex;
+
+inline bool  usearch (ustring::const_iterator first, ustring::const_iterator last, umatch& m, const uregex& re, boost::match_flag_type flags = boost::regex_constants::match_single_line) {
+    return regex_search (first, last, m, re, flags);
+}
+inline bool  usearch (const ustring& s, umatch& m, const uregex& re, boost::match_flag_type flags = boost::regex_constants::match_single_line) {
+    return regex_search (s.begin (), s.end (), m, re, flags);
+}
+
+#endif /* UTIL_REGEXP_H */
diff --git a/lib/util_splitter.h b/lib/util_splitter.h

index 59686b6..9cb6aaa 100644 (file)
--- a/lib/util_splitter.h
+++ b/lib/util_splitter.h
@@ -2,37 +2,76 @@
  #define UTIL_SPLITTER_H
  
  #include "ustring.h"
+#include "util_string.h"
  
  class  Splitter {
   public:
-    uregex*  re;
-    uiterator  b;
-    uiterator  t;
-    uiterator  u;
-    uiterator  e;
-    umatch  m;
+    uiterator  b;              // 先頭
+    uiterator  t;              // 区切り文字列先頭
+    uiterator  u;              // 区切り文字列末尾
+    uiterator  e;              // 末尾
  
-    Splitter (const ustring& text, uregex& r) {
-       b = t = u = text.begin ();
-       e = text.end ();
-       re = &r;
-    };
-    Splitter (uiterator pb, uiterator pe, uregex& r) {
-       b = t = u = pb;
-       e = pe;
-       re = &r;
+    Splitter (uiterator _begin, uiterator _end) {
+       b = t = u = _begin;
+       e = _end;
      };
      virtual  ~Splitter () {};
-    virtual void  init (uiterator pb, uiterator pe) {
-       b = t = u = pb;
-       e = pe;
-    };
+
      virtual bool  isEnd () {
         return b == e;
      };
+    virtual uiterator  begin () {
+       return b;
+    };
+    virtual uiterator  end () {
+       return t;
+    };
+    virtual ustring  pre () {
+       return ustring (b, t);
+    };
+    virtual size_t  preSize () {
+       return t - b;
+    };
+    virtual uiterator  matchBegin () {
+       return t;
+    };
+    virtual uiterator  matchEnd () {
+       return u;
+    };
+    virtual uiterator  eol () {
+       return e;
+    };
+    virtual void  rewind (int i) {
+       int  n = u - t;
+       if (n > i) {
+           u -= i;
+       } else {
+           u -= n;
+       }
+    };
+    virtual void  shiftCursor () {
+       b = u;
+    };
+    virtual bool  next () = 0;
+    virtual bool  nextSep () = 0;
+};
+
+class  SplitterRe: public Splitter {
+ public:
+    uregex*  re;
+    umatch  m;
+
+    SplitterRe (const ustring& text, uregex& _re): Splitter (text.begin (), text.end ()) {
+       re = &_re;
+    };
+    SplitterRe (uiterator _begin, uiterator _end, uregex& _re): Splitter (_begin, _end) {
+       re = &_re;
+    };
+    virtual  ~SplitterRe () {};
+
      virtual bool  next () {
         b = u;
-       if (b != e) {
+       if (b < e) {
             if (usearch (b, e, m, *re)) {
                 t = m[0].first;
                 u = m[0].second;
@@ -47,14 +86,13 @@ class  Splitter {
      };
      virtual bool  nextSep () {
         b = u;
-       if (b != e) {
+       if (b < e) {
             if (usearch (b, e, m, *re)) {
                 t = m[0].first;
                 u = m[0].second;
                 return true;
             } else {
-               t = e;
-               u = e;
+               t = u = e;
                 return false;
             }
         } else {
@@ -62,41 +100,21 @@ class  Splitter {
             return false;
         }
      };
-    virtual uiterator  begin () {
-       return b;
-    };
-    virtual uiterator  end () {
-       return t;
-    };
-    virtual ustring  cur () {
-       return ustring (b, t);
-    };
      virtual bool  match (int index) {
         return (t != u && m[index].matched);
      }
      virtual uiterator  matchBegin () {
         return t;
      };
-    virtual uiterator  matchBegin (int index) {
-       return m[index].first;
-    };
      virtual uiterator  matchEnd () {
         return u;
      };
+    virtual uiterator  matchBegin (int index) {
+       return m[index].first;
+    };
      virtual uiterator  matchEnd (int index) {
         return m[index].second;
      };
-    virtual uiterator  eol () {
-       return e;
-    };
-    virtual void  rewind (int i) {
-       int  n = u - t;
-       if (n > i) {
-           u -= i;
-       } else {
-           u -= n;
-       }
-    };
      virtual bool  nextSearch () {
         if (u != e) {
             if (usearch (u, e, m, *re)) {
@@ -114,8 +132,147 @@ class  Splitter {
             return false;
         }
      };
-    virtual void  shiftCursor () {
-       b = u;
+};
+
+class  SplitterCh: public Splitter {
+ public:
+    int  ch;
+
+    SplitterCh (uiterator _begin, uiterator _end, int _ch): Splitter (_begin, _end) {
+       ch = _ch;
+    };
+    SplitterCh (const ustring& text, int _ch): Splitter (text.begin (), text.end ()) {
+       ch = _ch;
+    };
+    virtual  ~SplitterCh () {};
+
+    virtual bool  next () {
+       b = t = u;
+       if (b < e) {
+           if (findChar (t, e, ch)) {
+               u = t + 1;
+           } else {
+               u = e;
+           }
+           return true;
+       } else {
+           return false;
+       }
+    };
+    virtual bool  nextSep () {
+       b = t = u;
+       if (b < e) {
+           if (findChar (t, e, ch)) {
+               u = t + 1;
+               return true;
+           } else {
+               u = e;
+               return false;
+           }
+       } else {
+           t = u = e;
+           return false;
+       }
+    };
+};
+
+class  SplitterChars: public Splitter {
+ public:
+    ustring  pattern;
+
+    SplitterChars (uiterator _begin, uiterator _end, const ustring& _pat): Splitter (_begin, _end) {
+       pattern = _pat;
+    };
+    SplitterChars (const ustring& text, const ustring& _pat): Splitter (text.begin (), text.end ()) {
+       pattern = _pat;
+    };
+    virtual  ~SplitterChars () {};
+
+    virtual bool  next () {
+       b = t = u;
+       if (b < e) {
+           if (findChars (t, e, pattern)) {
+               u = t + 1;
+           } else {
+               u = e;
+           }
+           return true;
+       } else {
+           return false;
+       }
+    };
+    virtual bool  nextSep () {
+       b = t = u;
+       if (b < e) {
+           if (findChars (t, e, pattern)) {
+               u = t + 1;
+               return true;
+           } else {
+               u = e;
+               return false;
+           }
+       } else {
+           t = u = e;
+           return false;
+       }
+    };
+};
+
+class  SplitterFn: public Splitter {
+ public:
+    bool  (*fn) (uiterator&, uiterator, uiterator&);
+
+    SplitterFn (uiterator _begin, uiterator _end, bool (*_fn)(uiterator&, uiterator, uiterator&)): Splitter (_begin, _end) {
+       fn = _fn;
+    };
+    SplitterFn (const ustring& text, bool (*_fn)(uiterator&, uiterator, uiterator&)): Splitter (text.begin (), text.end ()) {
+       fn = _fn;
+    };
+    virtual  ~SplitterFn () {};
+
+    virtual bool  next () {
+       b = t = u;
+       if (b < e) {
+           fn (t, e, u);
+           return true;
+       } else {
+           return false;
+       }
+    };
+    virtual bool  nextSep () {
+       b = t = u;
+       if (b < e) {
+           return fn (t, e, u);
+       } else {
+           t = u = e;
+           return false;
+       }
+    };
+};
+
+class  SplitterNL: public Splitter {
+ public:
+    SplitterNL (uiterator _begin, uiterator _end): Splitter (_begin, _end) {};
+    SplitterNL (const ustring& text): Splitter (text.begin (), text.end ()) {};
+    virtual  ~SplitterNL () {};
+
+    virtual bool  next () {
+       b = t = u;
+       if (b < e) {
+           findNL (t, e, u);
+           return true;
+       } else {
+           return false;
+       }
+    };
+    virtual bool  nextSep () {
+       b = t = u;
+       if (b < e) {
+           return findNL (t, e, u);
+       } else {
+           t = u = e;
+           return false;
+       }
      };
  };
  
diff --git a/lib/util_string.cc b/lib/util_string.cc

index 948bd2c..cebdd9b 100644 (file)
--- a/lib/util_string.cc
+++ b/lib/util_string.cc
@@ -62,11 +62,13 @@ ustring  UIConv::cv (const ustring& text) {
      return ans;
  }
  
+static bool  isDigit (int c) {
+    return '0' <= c && c <= '9';
+}
+
  ustring  c3 (const ustring& str) {
      bool  qsign = false;
-    static uregex  re ("^[0-9]+");
-    uiterator  b, e;
-    umatch  m;
+    uiterator  b, e, t;
  
      b = str.begin ();
      e = str.end ();
@@ -74,16 +76,16 @@ ustring  c3 (const ustring& str) {
         qsign = true;
         b = b + 1;
      }
-    if (usearch (b, e, m, re)) {
-       int  n = m[0].second - m[0].first;
+    t = b;
+    if (matchHeadFn (t, e, isDigit)) {
+       int  n = t - b;
         int  l = str.size () + n / 3;
         ustring  ans;
-
         ans.reserve (l);
         if (qsign) {
             ans.append (1, str[0]);
         }
-       for (; b != m[0].second; b ++) {
+       for (; b < t; ++ b) {
             ans.append (1, *b);
             if (n > 1 && n % 3 == 1) {
                 ans.append (CharConst (","));
@@ -184,124 +186,160 @@ ustring  urldecode_nonul (const ustring& str) {
      return ans;
  }
  
-ustring  omitPattern (const ustring& text, uregex& re) {
-    Splitter  sp (text, re);
-
-    if (sp.next ()) {
-       if (sp.match (0)) {
-           ustring  ans;
-           ans.reserve (text.length ());
-           if (sp.begin () != sp.end ())
-               ans.append (sp.begin (), sp.end ());
-           while (sp.next ()) {
-               if (sp.begin () != sp.end ())
-                   ans.append (sp.begin (), sp.end ());
-           }
-           return ans;
-       } else {
-           return text;
-       }
-    } else {
+static ustring  omitPattern (const ustring& text, int (*fn)(int)) {
+    uiterator  b = text.begin ();
+    uiterator  e = text.end ();
+    uiterator  p = b;
+    for (; p < e; ++ p) {
+       if (fn (*p))
+           break;
+    }
+    if (p == e) {
         return text;
+    } else {
+       ustring  ans;
+       ans.reserve (text.length ());
+       ans.assign (b, p);
+       ++ p;
+       for (; p < e; ++ p) {
+           if (! fn (*p))
+               ans.append (1, *p);
+       }
+       return ans;
      }
  }
  
  ustring  omitCtrl (const ustring& str) {
-    static uregex  re ("[\\x00-\\x1f\\x7f]+");
-    return omitPattern (str, re);
+    return omitPattern (str, iscntrl);
+}
+
+static int  iscntrlx (int c) {
+    static char  table_ctrlx[] = {
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 
+    };
+    if (0 <= c && c < 128)
+       return table_ctrlx[c];
+    return 0;
  }
  
  ustring  omitCtrlX (const ustring& str) {
-    static uregex  re ("[^\\x09\\x0a\\x20-\\x7e\\x80-\\xff]+");
-    return omitPattern (str, re);
+    return omitPattern (str, iscntrlx);
+}
+
+static int  isNUL (int c) {
+    return c == 0;
  }
  
  ustring  omitNul (const ustring& str) {
-    static uregex  re ("[\\x00]+");
-    return omitPattern (str, re);
+    return omitPattern (str, isNUL);
+}
+
+static int  iscrlfchar (int c) {
+    return c == 0x0a || c == 0x0d;
  }
  
  ustring  omitNL (const ustring& str) {
-    return omitPattern (str, re_nl);
+    return omitPattern (str, iscrlfchar);
+}
+
+static int  isnonasciichar (int c) {
+    return c < 0x20 || c > 0x7e;
  }
  
  ustring  omitNonAscii (const ustring& str) {
-    static uregex  re ("[^ -\\x7e]+");
-    return omitPattern (str, re);
+    return omitPattern (str, isnonasciichar);
+}
+
+static int  isnonasciiword (int c) {
+    return c < 0x21 || c > 0x7e;
  }
  
  ustring  omitNonAsciiWord (const ustring& str) {
-    static uregex  re ("[^\\x21-\\x7e]+");
-    return omitPattern (str, re);
+    return omitPattern (str, isnonasciiword);
  }
  
-static ustring  percentEncode (uiterator b, uiterator e, const uregex& re) {
-    // $1 -> _
-    // $2 -> %HEX
-    umatch  m;
+static ustring  percentEncode (Splitter& sp) {
      ustring  ans;
-
-    while (b < e && usearch (b, e, m, re)) {
-       if (b < m[0].first)
-           ans.append (b, m[0].first);
-       if (m[1].matched) {
+    int  c;
+    while (sp.nextSep ()) {
+       if (sp.preSize () > 0)
+           ans.append (sp.pre ());
+       c = *sp.matchBegin ();
+       if (c == '\0') {
             ans.append (uUScore);
-       } else if (m[2].matched) {
-           ans.append (percentHEX (*m[2].first));
         } else {
-           assert (0);
+           ans.append (percentHEX (c));
         }
-       b = m[0].second;
      }
-    if (b < e)
-       ans.append (b, e);
-
+    if (sp.preSize () > 0)
+       ans.append (sp.pre ());
      return ans;
  }
  
-ustring  percentEncode (uiterator b, uiterator e) {
-    static uregex  re ("(\\x00)|([^A-Za-z0-9_.~-])");
-
-    return percentEncode (b, e, re);
-}
-
-ustring  percentEncode_path (uiterator b, uiterator e) {
-    static uregex  re ("(\\x00)|([^A-Za-z0-9_/.~-])");
-
-    return percentEncode (b, e, re);
-}
-
-ustring  percentEncode (const ustring& str) {
-    return percentEncode (str.begin (), str.end ());
-}
-
-ustring  percentEncode_path (const ustring& str) {
-    return percentEncode_path (str.begin (), str.end ());
+static bool  findPercentChar (uiterator& b, uiterator e, uiterator& u) {
+    static char  table_percentchar[] = {               // (\x00)|([^A-Za-z0-9_.~\-])
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 
+       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 
+       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 
+    };
+    int  c;
+    for (; b < e; ++ b) {
+       c = *b;
+       if (c < 0 || c >= 128 || table_percentchar[c]) {
+           u = b + 1;
+           return true;
+       }
+    }
+    u = e;
+    return false;
  }
  
-#if 0
-ustring  percentEncode_path (uiterator b, uiterator e) {
-    uiterator  i;
-    ustring  ans;
-
-    for (i = b; i < e; i ++) {
-       if (*i == '/') {
-           if (b < i)
-               ans.append (percentEncode (b, i));
-           ans.append (uSlash);
-           b = i + 1;
+ustring  percentEncode (uiterator b, uiterator e) {
+//    static uregex  re ("(\\x00)|([^A-Za-z0-9_.~-])");
+    SplitterFn  sp (b, e, findPercentChar);
+    return percentEncode (sp);
+}
+
+static bool  findPercentPathChar (uiterator& b, uiterator e, uiterator& u) {
+    static char  table_percentpathchar[] = {           // (\x00)|([^A-Za-z0-9_\/.~\-])
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 
+       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 
+       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 
+    };
+    int  c;
+    for (; b < e; ++ b) {
+       c = *b;
+       if (c < 0 || c >= 128 || table_percentpathchar[c]) {
+           u = b + 1;
+           return true;
         }
      }
-    if (b < e)
-       ans.append (percentEncode (b, e));
-
-    return ans;
+    u = e;
+    return false;
  }
  
-ustring  percentEncode_path (const ustring& str) {
-    return percentEncode_path (str.begin (), str.end ());
+ustring  percentEncode_path (uiterator b, uiterator e) {
+//    static uregex  re ("(\\x00)|([^A-Za-z0-9_/.~-])");
+    SplitterFn  sp (b, e, findPercentPathChar);
+    return percentEncode (sp);
  }
-#endif
  
  ustring  percentDecode (const ustring& str) {
      ustring  ans;
@@ -330,10 +368,33 @@ ustring  percentDecode (const ustring& str) {
      return fixUTF8 (ans);
  }
  
-ustring  cookieencode (const ustring& text) {
-    static uregex  re ("([\\x00-\\x1f\\x7f])|([ ,;%\\x80-\\xff])");
+static bool  findCookieEncChar (uiterator& b, uiterator e, uiterator& u) {
+    static char  table_cookieencode[] = {              // ([\\x00-\\x1f\\x7f])|([ ,;%\\x80-\\xff])
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
+       1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 
+    };
+    int  c;
+    for (; b < e; ++ b) {
+       c = *b;
+       if (c < 0 || c >= 128 || table_cookieencode[c]) {
+           u = b + 1;
+           return true;
+       }
+    }
+    u = e;
+    return false;
+}
  
-    return percentEncode (text.begin (), text.end (), re);
+ustring  cookieencode (const ustring& text) {
+//    static uregex  re ("([\\x00-\\x1f\\x7f])|([ ,;%\\x80-\\xff])");
+    SplitterFn  sp (text.begin (), text.end (), findCookieEncChar);
+    return percentEncode (sp);
  }
  
  ustring  cookiedecode (const ustring& text) {
@@ -392,21 +453,40 @@ ustring  filePart_osSafe (const ustring& path) {
  }
  
  void  split (uiterator b, uiterator e, uregex& re, std::vector<ustring>& ans) {
-    Splitter  sp (b, e, re);
+    SplitterRe  sp (b, e, re);
  
      while (sp.next ()) {
-       ans.push_back (sp.cur ());
+       ans.push_back (sp.pre ());
+    }
+}
+
+void  split (uiterator b, uiterator e, int ch, std::vector<ustring>& ans) {
+    SplitterCh  sp (b, e, ch);
+
+    while (sp.next ()) {
+       ans.push_back (sp.pre ());
      }
  }
  
  void  splitE (uiterator b, uiterator e, uregex& re, std::vector<ustring>& ans) {
-    Splitter  sp (b, e, re);
+    SplitterRe  sp (b, e, re);
  
-    if (b != e) {
+    if (b < e) {
         while (sp.nextSep ()) {
-           ans.push_back (sp.cur ());
+           ans.push_back (sp.pre ());
         }
-       ans.push_back (ustring (sp.begin (), sp.eol ()));
+       ans.push_back (sp.pre ());
+    }
+}
+
+void  splitE (uiterator b, uiterator e, int ch, std::vector<ustring>& ans) {
+    SplitterCh  sp (b, e, ch);
+
+    if (b < e) {
+       while (sp.nextSep ()) {
+           ans.push_back (sp.pre ());
+       }
+       ans.push_back (sp.pre ());
      }
  }
  
@@ -634,7 +714,7 @@ ustring  jsEncode (const ustring& str) {
  
  ustring  filenameEncode (const ustring& text) {
      static uregex  re ("([\\x00-\\x1f\\x7f])|([^a-zA-Z0-9._-])|(^\\.+)");
-    Splitter  sp (text, re);
+    SplitterRe  sp (text, re);
      ustring  ans;
      int  c;
  
@@ -664,7 +744,7 @@ ustring  filenameEncode (const ustring& text) {
  
  ustring  filenameDecode (const ustring& text) {
      static uregex  re (":([0-9a-fA-F][0-9a-fA-F])");
-    Splitter  sp (text, re);
+    SplitterRe  sp (text, re);
      ustring  ans;
      int  c;
  
@@ -810,14 +890,16 @@ uint32_t  hextoul (uiterator b, uiterator e) {
  ustring  toCRLF (const ustring& str) {
      uiterator  b = str.begin ();
      uiterator  e = str.end ();
-    umatch  m;
+    uiterator  p;
      ustring  ans;
  
-    while (usearch (b, e, m, re_lf)) {
-       ans.append (b, m[0].first).append (uCRLF);
-       b = m[0].second;
+    p = b;
+    while (findChar (b, e, '\n')) {
+       ans.append (p, b).append (uCRLF);
+       p = ++ b;
      }
-    ans.append (b, e);
+    if (p < e)
+       ans.append (p, e);
      return ans;
  }
  
@@ -884,18 +966,14 @@ static ustring  colpad0 (int n, const ustring& src) {
   ${W}, ${w}
   ${o}
  */
-//ustring  formatDateString (const ustring& format, time_t tm) {
  ustring  formatDateString (const ustring& format, struct tm& v) {
      ustring  ans;
-//    struct tm  v;
      uiterator  b, e;
      umatch  m;
      int  pc;
-//    static uregex  re ("\\$\\{([YMDhmsWw])(:([0-9]))?\\}");
      static uregex  re ("\\$\\{(([YMDhmsWwo])(:([0-9]))?|M:((name)|(ab)|(abname)))\\}");
      std::vector<ustring>  fpar;
  
-//    localtime_r (&tm, &v);
      b = format.begin ();
      e = format.end ();
      while (usearch (b, e, m, re)) {
@@ -908,14 +986,11 @@ ustring  formatDateString (const ustring& format, struct tm& v) {
                 ans.append (MStr_a[v.tm_mon]);
             }
         } else {
-//         if (m[2].matched) {
             if (m[3].matched) {
-//             pc = strtol (ustring (m[3].first, m[3].second));
                 pc = strtol (ustring (m[4].first, m[4].second));
             } else {
                 pc = 0;
             }
-//         switch (*m[1].first) {
             switch (*m[2].first) {
             case 'Y':
                 ans.append (colpad0 (pc, to_ustring (v.tm_year + 1900)));
@@ -1006,3 +1081,119 @@ ustring  octchar (int c) {
      ans[0] = (c & 0x3) + '0';
      return ans;
  }
+
+bool  findNL (uiterator& b, uiterator e, uiterator& u) {
+    for (; b < e; ++ b) {
+       if (*b == '\n') {
+           u = b + 1;
+           return true;
+       } else if (*b == '\r') {
+           u = b + 1;
+           if (u < e && *u == '\n')
+               ++ u;
+           return true;
+       }
+    }
+    u = e;
+    return false;
+}
+
+bool  findNLb (uiterator& b, uiterator e) {
+    for (; b < e; ++ b) {
+       if (*b == '\n') {
+           ++ b;
+           return true;
+       } else if (*b == '\r') {
+           ++ b;
+           if (b < e && *b == '\n')
+               ++ b;
+           return true;
+       }
+    }
+    return false;
+}
+
+bool  findChar (uiterator& b, uiterator e, int ch) {
+    for (; b < e; ++ b) {
+       if (*b == ch) {
+           return true;
+       }
+    }
+    return false;
+}
+
+bool  findChars (uiterator& b, uiterator e, const ustring& pattern) {
+    for (; b < e; ++ b) {
+       if (pattern.find (*b) != ustring::npos) {
+           return true;
+       }
+    }
+    return false;
+}
+
+bool  findCharFn (uiterator& b, uiterator e, bool (*fn)(int)) {
+    for (; b < e; ++ b) {
+       if (fn (*b))
+           return true;
+    }
+    return false;
+}
+
+bool  findSepColon (uiterator& b, uiterator e, uiterator& u) {
+    // " *; *"を探索する。bは進む
+    uiterator  p = b;
+    if (findChar (b, e, ';')) {
+       u = b + 1;
+       while (p < b && *(b - 1) == ' ')
+           -- b;
+       while (u < e && *u == ' ')
+           ++ u;
+       return true;
+    }
+    u = e;
+    return false;
+}
+
+bool  matchHeadFn (uiterator& b, uiterator e, bool (*fn)(int)) {
+    if (b < e && fn (*b)) {
+       do {
+           ++ b;
+       } while (b < e && fn (*b));
+       return true;
+    }
+    return false;
+}
+
+bool  matchWordTbl (uiterator b, uiterator e, char* tbl) {
+    int  c;
+    if (b < e) {
+       do {
+           c = *b;
+           if (0 <= c && c < 128 && tbl[c]) {  // 128〜はfalse
+           } else {
+               return false;
+           }
+           ++ b;
+       } while (b < e);
+       return true;
+    } else {
+       return false;
+    }
+}
+
+bool  matchWordFn (uiterator b, uiterator e, bool (*fn)(int)) {
+    int  c;
+    if (b < e) {
+       do {
+           c = *b;
+           if (0 <= c && c < 128 && fn (c)) {
+           } else {
+               return false;
+           }
+           ++ b;
+       } while (b < e);
+       return true;
+    } else {
+       return false;
+    }
+}
diff --git a/lib/util_string.h b/lib/util_string.h

index 9f7f8e9..e0207b3 100644 (file)
--- a/lib/util_string.h
+++ b/lib/util_string.h
@@ -2,6 +2,7 @@
  #define UTIL_STRING_H
  
  #include "ustring.h"
+#include "util_regex.h"
  #include <time.h>
  #include "iconv_glue.h"
  #include <iconv.h>
@@ -51,7 +52,6 @@ inline uint32_t  to_uint32 (const ustring& v) {
  }
  ustring  percentHEX (int c);
  ustring  urldecode_nonul (const ustring& str);
-ustring  omitPattern (const ustring& text, uregex& re);
  ustring  omitCtrl (const ustring& str);
  ustring  omitCtrlX (const ustring& str);
  ustring  omitNul (const ustring& str);
@@ -59,9 +59,13 @@ ustring  omitNL (const ustring& str);
  ustring  omitNonAscii (const ustring& str);
  ustring  omitNonAsciiWord (const ustring& str);
  ustring  percentEncode (uiterator b, uiterator e);
-ustring  percentEncode (const ustring& str);
+inline ustring  percentEncode (const ustring& str) {
+    return percentEncode (str.begin (), str.end ());
+}
  ustring  percentEncode_path (uiterator b, uiterator e);
-ustring  percentEncode_path (const ustring& str);
+inline ustring  percentEncode_path (const ustring& str) {
+    return percentEncode_path (str.begin (), str.end ());
+}
  ustring  percentDecode (const ustring& str);
  ustring  cookieencode (const ustring& text);
  ustring  cookiedecode (const ustring& text);
@@ -69,7 +73,9 @@ ustring  clipColon (const ustring& text);
  ustring  dirPart (const ustring& path);
  ustring  filePart_osSafe (const ustring& path);
  void  split (uiterator b, uiterator e, uregex& re, std::vector<ustring>& ans);
+void  split (uiterator b, uiterator e, int ch, std::vector<ustring>& ans);
  void  splitE (uiterator b, uiterator e, uregex& re, std::vector<ustring>& ans);
+void  splitE (uiterator b, uiterator e, int ch, std::vector<ustring>& ans);
  bool  splitChar (uiterator b, uiterator e, uiterator::value_type ch, uiterator& m1);
  ustring  filenameEncode (const ustring& text);
  ustring  filenameDecode (const ustring& text);
@@ -112,4 +118,14 @@ ustring  hexEncode (const ustring& data);
  int  octchar (uiterator b);
  ustring  octchar (int c);
  
+bool  findNL (uiterator& b, uiterator e, uiterator& u);
+bool  findNLb (uiterator& b, uiterator e);
+bool  findChar (uiterator& b, uiterator e, int ch);
+bool  findChars (uiterator& b, uiterator e, const ustring& pattern);
+bool  findCharFn (uiterator& b, uiterator e, bool (*fn)(int));
+bool  findSepColon (uiterator& b, uiterator e, uiterator& u);
+bool  matchHeadFn (uiterator& b, uiterator e, bool (*fn)(int));
+bool  matchWordTbl (uiterator b, uiterator e, char* tbl);
+bool  matchWordFn (uiterator b, uiterator e, bool (*fn)(int));
+
  #endif /* UTIL_STRING_H */
diff --git a/lib/util_tcp.cc b/lib/util_tcp.cc

index a4e6bd0..4bba0fd 100644 (file)
--- a/lib/util_tcp.cc
+++ b/lib/util_tcp.cc
@@ -570,7 +570,7 @@ ssize_t  SslClient::read2 (void* buf, size_t nbytes) {
  bool  ProxySslClient::connect2 () {
      ustring  msg;
  
-    if (checkHostname (ephost->host)) {
+    if (matchHostname (ephost->host)) {
         msg.assign (CharConst ("CONNECT "));
         msg.append (ephost->host).append (uColon).append (to_ustring (ephost->port)).append (CharConst (" HTTP/1.0" kCRLF));
         msg.append (CharConst ("Host: ")).append (ephost->host).append (uCRLF);
diff --git a/lib/util_tcp.h b/lib/util_tcp.h

index acd1f65..bba5bd0 100644 (file)
--- a/lib/util_tcp.h
+++ b/lib/util_tcp.h
@@ -2,6 +2,7 @@
  #define UTIL_TCP_H
  
  #include "ustring.h"
+#include <assert.h>
  #include <ctype.h>
  #include <sys/types.h>
  #include <netinet/in.h>
diff --git a/modules/ml-addon.cc b/modules/ml-addon.cc

index 33d2b92..d106da8 100644 (file)
--- a/modules/ml-addon.cc
+++ b/modules/ml-addon.cc
@@ -9,6 +9,7 @@
  #include "util_const.h"
  #include "util_check.h"
  #include "util_string.h"
+#include "util_splitter.h"
  #include "util_proc.h"
  #include "ustring.h"
  #include "expr.h"
@@ -67,7 +68,15 @@ public:
             ustring  u;
             while (rest) {
                 u = eval_str (rest->car (), mlenv);
-               u = regex_replace (u, re_nl, uSPC, boost::regex_constants::match_single_line);
+               {
+                   ustring  ans;
+                   SplitterNL  sp (u);
+                   while (sp.nextSep ()) {
+                       ans.append (sp.pre ()).append (uSPC);
+                   }
+                   ans.append (sp.pre ());
+                   u = ans;
+               }
                 par.push_back (u);
                 nextNode (rest);
             }
@@ -104,7 +113,7 @@ public:
         cmd = eval_str (params[0], mlenv);
         type = eval_asciiword (keywords[1], mlenv);
         evkw_bool (2, fContinue);
-       if (! checkMimeType (type))
+       if (! matchMimeType (type))
             type.resize (0);
      };
  };
@@ -116,7 +125,7 @@ static void  addon_sub2 (MlEnv* mlenv, AddonParams& o) {
  
      if (o.cmd.size () == 0)
         throw (uErrorCmdNameEmpty);
-    if (! checkFilename (o.cmd))
+    if (! matchFilename (o.cmd))
         throw (o.cmd + uErrorBadCmd);
  
      {
@@ -173,20 +182,25 @@ MNode*  ml_addon_tab (MNode* cell, MlEnv* mlenv) {
      addon_sub2 (mlenv, o);
      {
         ustring  a;
-       uiterator  b, e;
-       umatch  m;
+       uiterator  b, e, p;
+//     umatch  m;
         MNodeList  ans;
  
         o.proc.read (a);
         a = fixUTF8 (a);
         b = a.begin ();
         e = a.end ();
-       while (usearch (b, e, m, re_tab)) {
-           ans.append (newMNode_str (new ustring (b, m[0].first)));
-           b = m[0].second;
+//     while (usearch (b, e, m, re_tab)) {
+//         ans.append (newMNode_str (new ustring (b, m[0].first)));
+//         b = m[0].second;
+//     }
+       p = b;
+       while (findChar (b, e, '\t')) {
+           ans.append (newMNode_str (new ustring (p, b)));
+           p = ++ b;
         }
-       if (b != e) {
-           ans.append (newMNode_str (new ustring (b, e)));
+       if (p < e) {
+           ans.append (newMNode_str (new ustring (p, e)));
         }
         return ans.release ();
      }
@@ -216,8 +230,8 @@ MNode*  ml_addon_array_tab_nl (MNode* cell, MlEnv* mlenv) {
      addon_sub2 (mlenv, o);
      {
         ustring  a, u;
-       uiterator  b, e;
-       umatch  m, m2;
+       uiterator  b, e, p, p2;
+//     umatch  m, m2;
         size_t  n = 0;
         int  i;
  
@@ -225,36 +239,50 @@ MNode*  ml_addon_array_tab_nl (MNode* cell, MlEnv* mlenv) {
         a = fixUTF8 (a);
         b = a.begin ();
         e = a.end ();
-       while (usearch (b, e, m, re_lf)) {
+       p = b;
+//     while (usearch (b, e, m, re_lf)) {
+       while (findChar (b, e, '\n')) {
             n ++;
             i = 0;
-           while (i < vars.size () && usearch (b, m[0].first, m2, re_tab)) {
-               h = newMNode_str (new ustring (b, m2[0].first));
+//         while (i < vars.size () && usearch (b, m[0].first, m2, re_tab)) {
+//             h = newMNode_str (new ustring (b, m2[0].first));
+           p2 = p;
+           while (i < vars.size () && findChar (p, b, '\t')) {
+               h = newMNode_str (new ustring (p2, p));
                 mlenv->setAry (vars[i], n, h.p);
                 i ++;
-               b = m2[0].second;
+//             b = m2[0].second;
+               p2 = ++ p;
             }
-           if (i < vars.size () && b != m[0].first) {
-               h = newMNode_str (new ustring (b, m[0].first));
+//         if (i < vars.size () && b != m[0].first) {
+//             h = newMNode_str (new ustring (b, m[0].first));
+           if (i < vars.size () && p < b) {
+               h = newMNode_str (new ustring (p, b));
                 mlenv->setAry (vars[i], n, h.p);
                 i ++;
             }
             for (; i < vars.size (); i ++) {
                 mlenv->setAry (vars[i], n, NULL);
             }
-           b = m[0].second;
+//         b = m[0].second;
+           p = ++ b;
         }
-       if (b != e) {
+//     if (b != e) {
+       if (p < e) {
             n ++;
             i = 0;
-           while (i < vars.size () && usearch (b, e, m2, re_tab)) {
-               h = newMNode_str (new ustring (b, m2[0].first));
+//         while (i < vars.size () && usearch (b, e, m2, re_tab)) {
+//             h = newMNode_str (new ustring (b, m2[0].first));
+           p2 = p;
+           while (i < vars.size () && findChar (p, e, '\t')) {
+               h = newMNode_str (new ustring (p2, p));
                 mlenv->setAry (vars[i], n, h.p);
                 i ++;
-               b = m2[0].second;
+//             b = m2[0].second;
+               p2 = ++ p;
             }
-           if (i < vars.size () && b != e) {
-               h = newMNode_str (new ustring (b, e));
+           if (i < vars.size () && p < e) {
+               h = newMNode_str (new ustring (p, e));
                 mlenv->setAry (vars[i], n, h.p);
                 i ++;
             }
diff --git a/modules/ml-apache.cc b/modules/ml-apache.cc

index 9502ab0..abb3f89 100644 (file)
--- a/modules/ml-apache.cc
+++ b/modules/ml-apache.cc
@@ -183,7 +183,7 @@ MNode*  ml_get_http_header (MNode* cell, MlEnv* mlenv) {
         }
      }
  
-    if (checkAlNum (name.begin (), e) && name.length () < 128) {
+    if (matchAlNum (name.begin (), e) && name.length () < 128) {
         name = ustring (CharConst ("HTTP_")).append (name);
         char*  e = getenv (name.c_str ());
         if (e) {
diff --git a/modules/ml-config.cc b/modules/ml-config.cc

index 64b488f..ba63b2b 100644 (file)
--- a/modules/ml-config.cc
+++ b/modules/ml-config.cc
@@ -44,7 +44,7 @@ MNode*  ml_datastore_list (MNode* cell, MlEnv* mlenv) {
             name.assign (de->d_name);
  #endif
             t.assign (CharConst (cDataTop kDS)).append (name).append (CharConst (kSubStore));
-           if (checkName (name) && isDirectory (t)) {
+           if (matchName (name) && isDirectory (t)) {
                 ans.append (newMNode_str (new ustring (name)));
             }
         }
diff --git a/modules/ml-cookielogin.cc b/modules/ml-cookielogin.cc

index ed8bd9f..adc7bc8 100644 (file)
--- a/modules/ml-cookielogin.cc
+++ b/modules/ml-cookielogin.cc
@@ -9,6 +9,7 @@
  #include "util_check.h"
  #include "util_random.h"
  #include "util_string.h"
+#include "util_splitter.h"
  #include "util_time.h"
  #include "sigsafe.h"
  #include "bdbmacro.h"
@@ -27,52 +28,18 @@
  */
  
  static void  splitRec (ustring& rec, ustring& id, ustring& limit, ustring& avail, ustring& group, ustring& ip) {
-    uiterator  b = rec.begin ();
-    uiterator  e = rec.end ();
-    umatch  m;
-
-    if (b == e || ! usearch (b, e, m, re_colon)) {
-       id.assign (b, e);
-       goto Ex2;
-    }
-    id.assign (b, m[0].first);
-    b = m[0].second;
-    if (b == e || ! usearch (b, e, m, re_colon)) {
-       limit.assign (b, e);
-       goto Ex3;
-    }
-    limit.assign (b, m[0].first);
-    b = m[0].second;
-    if (b == e || ! usearch (b, e, m, re_colon)) {
-       avail.assign (b, e);
-       goto Ex4;
-    }
-    avail.assign (b, m[0].first);
-    b = m[0].second;
-    if (b == e || ! usearch (b, e, m, re_colon)) {
-       group.assign (b, e);
-       goto Ex5;
-    }
-    group.assign (b, m[0].first);
-    b = m[0].second;
-    if (b == e || ! usearch (b, e, m, re_colon)) {
-       ip.assign (b, e);
-    } else {
-       ip.assign (b, m[0].first);
-    }
-    return;
-
-// Ex1:
-//    id.resize (0);
- Ex2:
-    limit.resize (0);
- Ex3:
-    avail.resize (0);
- Ex4:
-    group.resize (0);
- Ex5:
-    ip.resize (0);
-
+    SplitterCh  sp (rec, ':');
+
+    sp.nextSep ();
+    id = sp.pre ();
+    sp.nextSep ();
+    limit = sp.pre ();
+    sp.nextSep ();
+    avail = sp.pre ();
+    sp.nextSep ();
+    group = sp.pre ();
+    sp.nextSep ();
+    ip = sp.pre ();
      return;
  }
  
@@ -151,7 +118,7 @@ MNode*  ml_cookielogin (MNode* cell, MlEnv* mlenv) {
  
      if (name.size () == 0)
         throw (uErrorFilenameEmpty);
-    if (! checkName (name))
+    if (! matchName (name))
         throw (name + uErrorBadName);
      if (mlenv->env) {
         SigSafe  sig;
@@ -236,7 +203,7 @@ MNode*  ml_cookielogin_login (MNode* cell, MlEnv* mlenv, MLFunc* mobj) {
         r.append (clipColon (id)).append (uColon);
         limit = now () + avail;
         r.append (to_ustring (limit)).append (uColon).append (to_ustring (avail)).append (uColon).append (clipColon (group));
-       if (ip.size () > 0 && checkIP (ip)) {
+       if (ip.size () > 0 && matchIP (ip)) {
             r.append (uColon).append (ip);      // IP
         } else {
             r.append (uColon);
@@ -377,7 +344,6 @@ MNode*  ml_cookielogin_delete (MNode* cell, MlEnv* mlenv, MLFunc* mobj) {
      MNode*  arg = cell->cdr ();
      MLCookieLogin*  obj = MObjRef<MLCookieLogin> (mobj, cMLCookieLoginID);
      ustring  key, val;
-    umatch  m;
      ustring  id;
      std::vector<ustring>  keys;
      std::vector<ustring>::iterator  it;
@@ -392,8 +358,9 @@ MNode*  ml_cookielogin_delete (MNode* cell, MlEnv* mlenv, MLFunc* mobj) {
      obj->opendb ();
      obj->db.initeach ();
      while (obj->db.each (key, val)) {
-       if (usearch (val, m, re_colon)) {
-           if (match (val.begin (), m[0].first, id)) {
+       SplitterCh  sp (val, ':');
+       if (sp.nextSep ()) {    // セパレータが存在するとき、idを含むレコード
+           if (sp.pre () == id) {
                 keys.push_back (key);
             }
         }
diff --git a/modules/ml-formvar.cc b/modules/ml-formvar.cc

index bda1bb3..a030d81 100644 (file)
--- a/modules/ml-formvar.cc
+++ b/modules/ml-formvar.cc
@@ -343,7 +343,6 @@ MNode*  ml_formvar_input_file (MNode* cell, MlEnv* mlenv) {
      ustring  var;
      ustring  fname;
      FormVarOp  opt;
-//    ustring  val;
      ustring  tgt;
      ustring  filename;
      int  idx;
@@ -384,14 +383,10 @@ MNode*  ml_formvar_input_file (MNode* cell, MlEnv* mlenv) {
      if (mlenv->env->storedir.empty ()) {
         newStoreSerial (mlenv);
      }
-//    mlenv->env->form->fileAt (var, val);
-//    if (val.size () > 0) {
-//     i = strtoul (val);
      idx = mlenv->env->form->at (var, filename);
      idx = mlenv->env->form->partAt (idx);
      if (idx >= 0) {
         tgt = mlenv->env->path_store_file (fname);
-//     mlenv->env->form->at (var, filename);
         if (opt.filter.size () > 0) {
             if (wsearch_env (mlenv->regenv, filename, opt.filter)) {
                 filename.assign (mlenv->regenv.regmatch[0].first, mlenv->regenv.regmatch[0].second);
@@ -438,7 +433,7 @@ MNode*  ml_formvar_input_file (MNode* cell, MlEnv* mlenv) {
  
  /*DOC:
  ===input-file@===
- (input-file@ ARRAY FILE_PREFIX) -> LIST_of_a_Pair_of_SavedFileName_and_OriginalFileName
+ (input-file@ ARRAY FILE_PREFIX) -> LIST_of_a_List_of_SavedFileName_OriginalFileName_and_Type
  
  */
  //#AFUNC       input-file@     ml_formvar_input_file_a
@@ -453,7 +448,6 @@ MNode*  ml_formvar_input_file_a (MNode* cell, MlEnv* mlenv) {
      size_t  i, n;
      int  idx;
      ustring  tgt;
-//    ustring  val;
      ustring  si;
      ustring  filename;
      ustring  tgtname;
@@ -461,18 +455,14 @@ MNode*  ml_formvar_input_file_a (MNode* cell, MlEnv* mlenv) {
      setParams (arg, 2, &params, NULL, NULL, NULL);
      name = eval_str (params[0], mlenv);
      prefix = eval_str (params[1], mlenv);
-//    prefix.append (uDash);
  
      if (mlenv->env->storedir.empty ()) {
         newStoreSerial (mlenv);
      }
      target0 = mlenv->env->path_store_file (prefix);
  
-//    n = mlenv->env->form->fileAtSize (name);
      n = mlenv->env->form->atSize (name);
      for (i = 0; i < n; i ++) {
-//     mlenv->env->form->fileAt (name, i, val);
-//     if (val.size () > 0) {
         idx = mlenv->env->form->at (name, i, filename);
  #ifdef DEBUG2
         std::cerr << "i:" << idx << " ";
@@ -485,13 +475,13 @@ MNode*  ml_formvar_input_file_a (MNode* cell, MlEnv* mlenv) {
             si.assign (to_ustring (i + 1));
             tgt.assign (target0).append (si);
             tgtname.assign (prefix).append (si);
-//         mlenv->env->form->at (name, i, filename);
             // ++ filter
             if (filename.length () > 0) {
                 MNodeList  l;
                 mlenv->env->form->saveFile (idx, tgt, 0);       // ++ opt.max
                 l.append (newMNode_str (new ustring (tgtname)));
                 l.append (newMNode_str (new ustring (filename)));
+               l.append (newMNode_str (new ustring (mlenv->env->form->typeAt (idx))));
                 ans.append (l.release ());
  #ifdef DEBUG
                 if (mlenv->log) {
diff --git a/modules/ml-http.cc b/modules/ml-http.cc

index 6409ebb..ac031d0 100644 (file)
--- a/modules/ml-http.cc
+++ b/modules/ml-http.cc
@@ -413,7 +413,7 @@ MNode*  ml_build_url (MNode* cell, MlEnv* mlenv) {
  
      if (! checkScheme (scheme))
         throw (scheme + ": bad scheme.");
-    if (! checkHostname (host))
+    if (! matchHostname (host))
         throw (host + ": bad hostname.");
  
      ans = new ustring;
@@ -580,7 +580,7 @@ MNode*  ml_hostnamep (MNode* cell, MlEnv* mlenv) {
      if (arg)
         throw (uErrorWrongNumber);
  
-    return newMNode_bool (checkHostname (hostname));
+    return newMNode_bool (matchHostname (hostname));
  }
  
  /*DOC:
@@ -725,7 +725,7 @@ MNode*  ml_http_get (MNode* cell, MlEnv* mlenv) {
         obj.http->rawquery = to_string (t ());
      if (evkw (26, t)) {
         obj.http->querytype = to_string (t ());
-       if (!checkASCII (obj.http->querytype))
+       if (! matchASCII (obj.http->querytype.begin (), obj.http->querytype.end ()))
             throw (obj.http->querytype + ustring (CharConst (": bad type")));
      }
      if (evkw (27, t))          // raw-file-serial
diff --git a/modules/ml-motor.cc b/modules/ml-motor.cc

index 347f706..d485e2a 100644 (file)
--- a/modules/ml-motor.cc
+++ b/modules/ml-motor.cc
@@ -52,7 +52,7 @@ MNode*  ml_output_header (MNode* cell, MlEnv* mlenv) {
  
      if (type.empty ())
         throw (ustring (CharConst ("missing type.")));
-    else if (! checkMimeType (type))
+    else if (! matchMimeType (type))
         type = mimetype (type);
  
      if (! mlenv->env->responseDone) {
@@ -107,7 +107,7 @@ MNode*  ml_motor_file (MNode* cell, MlEnv* mlenv) {
  #endif
      if (evkw (0, t)) {                         // type
         type = t.to_asciiword ();
-       if (! checkMimeType (type))
+       if (! matchMimeType (type))
             type.resize (0);
      }
      if (keywords[1])                           // error
diff --git a/modules/ml-neon.cc b/modules/ml-neon.cc

index 942bfe3..a4e9246 100644 (file)
--- a/modules/ml-neon.cc
+++ b/modules/ml-neon.cc
@@ -330,7 +330,7 @@ void  NeonSession::setNoVerify () {
  }
  
  void  NeonSession::setProxy (const ustring& host, int port) {
-    if (checkHostname (host) && port > 0 && port < 65536) {
+    if (matchHostname (host) && port > 0 && port < 65536) {
  #ifdef DEBUG2
         std::cerr << "set proxy " << host << ":" << port << "\n";
  #endif /* DEBUG */
@@ -813,7 +813,7 @@ MNode*  ml_neon (MNode* cell, MlEnv* mlenv) {
      evkw (4, errfn);                   // 4:on-error
      evkw_bool (5, obj.fnoverify);      // 5:no-verify
  
-    if (! checkHostname (obj.host))
+    if (! matchHostname (obj.host))
         throw (obj.host + ": bad hostname.");
      if (obj.port <= 0 || obj.port >= 65536)
         throw (to_ustring (obj.port) + ": bad port number.");
@@ -994,7 +994,7 @@ MNode*  ml_neon_http_request (MNode* cell, MlEnv* mlenv, MLFunc* mobj) {
         obj->query->rawquery.srcStatic (to_string (t ()));
      if (evkw (16, t)) {                // 16:query-type
         obj->query->querytype = to_string (t ());
-       if (!checkASCII (obj->query->querytype))
+       if (! matchASCII (obj->query->querytype.begin (), obj->query->querytype.end ()))
             throw (obj->query->querytype + ustring (CharConst (": bad type")));
      }
      evkw (17, obj->query->cookie);     // 17:cookie
diff --git a/modules/ml-sendmail.cc b/modules/ml-sendmail.cc

index 6008357..19e82a8 100644 (file)
--- a/modules/ml-sendmail.cc
+++ b/modules/ml-sendmail.cc
@@ -16,6 +16,7 @@
  #include "util_check.h"
  #include "util_file.h"
  #include "util_string.h"
+#include "util_splitter.h"
  #include "util_proc.h"
  #include "util_time.h"
  #include "ustring.h"
@@ -69,8 +70,6 @@ static void  sendmail (const ustring& text, const ustring& faddr, std::vector<us
      MotorOutputString  out;
  #endif
      HTMLMotor  motor;
-    uiterator  b, e;
-    umatch  m, m2;
      SendmailHdr  hdr;
      ustring  line;
      ustring  t;
@@ -78,7 +77,6 @@ static void  sendmail (const ustring& text, const ustring& faddr, std::vector<us
      char**  argv;
      int  i;
      FILE*  fd;
-    static uregex  re_white ("^[ \\t]+");
  
      if (text.size () == 0) 
         return;
@@ -88,23 +86,20 @@ static void  sendmail (const ustring& text, const ustring& faddr, std::vector<us
         motor.compile (text);
         motor.output (&out, mlenv->env);
      }
-    b = out.ans.begin ();
-    e = out.ans.end ();
+    SplitterNL  sp (out.ans);
      line.resize (0);
-    while (usearch (b, e, m, re_nl)) {
-       if (b == m[0].first) {
-           b = m[0].second;
+    while (sp.next ()) {
+       if (sp.b == sp.t) {
             break;
         }
-       if (usearch (b, m[0].first, m2, re_white)) {
+       if (*sp.b == ' ' || *sp.b == '\t') {
             line.append (uLF);
-           line.append (b, m[0].first);
+           line.append (sp.pre ());
         } else {
             if (line.size () > 0)
                 hdr.line (line);
-           line = ustring (b, m[0].first);
+           line = ustring (sp.pre ());
         }
-       b = m[0].second;
      }
      if (line.size () > 0)
         hdr.line (line);
@@ -153,15 +148,10 @@ static void  sendmail (const ustring& text, const ustring& faddr, std::vector<us
  
      fwrite ("\n", 1, 1, fd);
  
-    while (usearch (b, e, m, re_nl)) {
-       if (b != m[0].first)
-           fwrite (&b[0], sizeof (ustring::value_type), m[0].first - b, fd);
+    while (sp.next ()) {
+       if (sp.b < sp.t)
+           fwrite (&sp.b[0], sizeof (ustring::value_type), sp.t - sp.b, fd);
         fwrite ("\n", 1, 1, fd);
-       b = m[0].second;
-    }
-    if (b != e) {
-       fwrite (&b[0], sizeof (ustring::value_type), e - b, fd);
-
      }
      fclose (fd);
  }
diff --git a/modules/ml-store.cc b/modules/ml-store.cc

index 6d93395..956e4ee 100644 (file)
--- a/modules/ml-store.cc
+++ b/modules/ml-store.cc
@@ -1109,7 +1109,7 @@ MNode*  ml_response_motor (MNode* cell, MlEnv* mlenv) {
      
      if (type.empty ()) {
         type = mimetype (getExt (src));
-    } else if (! checkMimeType (type)) {
+    } else if (! matchMimeType (type)) {
         type = mimetype (type);
      }
  
@@ -1195,7 +1195,7 @@ MNode*  ml_response_file (MNode* cell, MlEnv* mlenv) {
         } else {
             type = mimetype (getExt (src));
         }
-    } else if (! checkMimeType (type)) {
+    } else if (! matchMimeType (type)) {
         type = mimetype (type);
      }
  
diff --git a/modules/ml-string.cc b/modules/ml-string.cc

index 1361906..5e2d5c4 100644 (file)
--- a/modules/ml-string.cc
+++ b/modules/ml-string.cc
@@ -1126,7 +1126,8 @@ MNode*  ml_is_ascii63 (MNode* cell, MlEnv* mlenv) {
      if (arg)
         throw (uErrorWrongNumber);
  
-    ans = checkASCII (text);
+//    ans = checkASCII (text);
+    ans = matchASCII (text.begin (), text.end ());
  
      return newMNode_bool (ans);
  }
diff --git a/modules/motor-function.cc b/modules/motor-function.cc

index 5e51f87..828542a 100644 (file)
--- a/modules/motor-function.cc
+++ b/modules/motor-function.cc
@@ -199,6 +199,7 @@ void  mf_date (const std::vector<ustring>& args, MlEnv* mlenv) {
      }
  }
  
+#if 0
  /*DOC:
  ===doarray===
   [[doarray:VARIABLE,...[:VARIABLE] TEXT...]]
@@ -217,3 +218,4 @@ void  mf_doarray (const std::vector<ustring>& args, const MotorObj::MotorObjVec&
      
      //****
  }
+#endif
diff --git a/wiki/wikiattrib.cc b/wiki/wikiattrib.cc

index 1ae61ad..f6b5b09 100644 (file)
--- a/wiki/wikiattrib.cc
+++ b/wiki/wikiattrib.cc
@@ -266,7 +266,7 @@ bool  WikiAttrib1::paramID (const ustring& key, WikiMotorObjVec& vval, bool& fer
  
  void  WikiAttrib1::paramIDValue (const ustring& key, WikiMotorObjVec& vval, ustring& var, bool& ferr) {
      ustring  value (vval.textOut (wiki));
-    if (checkWikiID (value)) {
+    if (matchWikiID (value)) {
         var = value;
         ferr = false;
      } else {
@@ -291,7 +291,7 @@ void  WikiAttrib1::paramClassValue (WikiMotorObjVec& vval, std::vector<ustring>&
      for (int i = 0; i < args.size (); i ++) {
         ustring  value (args[i]->textOut (wiki));
         if (value.length () > 0) {
-           if (checkWikiID (value)) {
+           if (matchWikiID (value)) {
                 var.push_back (value);
             } else {
                 wiki->errorMsg.append (value).append (CharConst (": bad class name\n"));
@@ -321,7 +321,7 @@ bool  WikiAttrib1::paramHeight (const ustring& key, WikiMotorObjVec& vval, ustri
  
  void  WikiAttrib1::paramWidthValue (const ustring& key, WikiMotorObjVec& vval, ustring& var, bool& ferr) {
      ustring  value (vval.textOut (wiki));
-    if (checkWidth (value)) {
+    if (matchWidth (value)) {
         var = value;
         ferr = false;
      } else {
@@ -333,7 +333,7 @@ void  WikiAttrib1::paramWidthValue (const ustring& key, WikiMotorObjVec& vval, u
  bool  WikiAttrib1::paramSize (const char* name, size_t namelen, const ustring& key, WikiMotorObjVec& vval, ustring& var, bool& ferr) {
      if (match (key, name, namelen)) {
         ustring  value (vval.textOut (wiki));
-       if (checkNum (value)) {
+       if (matchNum (value)) {
             var = value;
             ferr = false;
         } else {
@@ -345,7 +345,7 @@ bool  WikiAttrib1::paramSize (const char* name, size_t namelen, const ustring& k
  }
  
  void  WikiAttrib1::paramUNum (const ustring& value, int& var, const ustring& name) {
-    if (checkNum (value)) {
+    if (matchNum (value)) {
         var = strtoul (value);
      } else {
         wiki->errorMsg.append (name).append (uEq).append (value).append (uErrorBadValue).append (uLF);
@@ -365,7 +365,7 @@ bool  WikiAttrib1::paramTargetCheck (const ustring& key) {
  }
  
  void  WikiAttrib1::paramTargetBody (const ustring& key, const ustring& value, ustring& var, bool& ferr) {
-    if (value.length () == 0 || checkWikiID (value)) {
+    if (value.length () == 0 || matchWikiID (value)) {
         var = value;
      } else {
         if (key.length () > 0)
@@ -394,9 +394,9 @@ bool  WikiAttrib1::paramOnChangeCheck (const ustring& name) {
  
  #ifdef BOOTSTRAPHACK
  bool  WikiAttrib1::paramDataPrefix (const ustring& key, WikiMotorObjVec& vval, bool& ferr) {
-    if (matchHead (key, CharConst ("data-")) && checkWikiID (key)) {
+    if (matchHead (key, CharConst ("data-")) && matchWikiID (key)) {
         ustring  value (vval.textOut (wiki));
-       if (checkWikiID (value)) {
+       if (matchWikiID (value)) {
             datapre.push_back (std::pair<ustring,ustring> (key, value));
             ferr = false;
         } else {
@@ -582,10 +582,10 @@ void  WikiAttribTable::outputMore (MotorOutput* out) {
  /* ============================================================ */
  bool  WikiAttribImg::readAttribMore (const ustring& key, WikiMotorObjVec& vval, bool& ferr) {
      if (paramWidth (key, vval, width, ferr)) {
-       if (checkNum (width))
+       if (matchNum (width))
             width.append (CharConst ("px"));
      } else if (paramHeight (key, vval, height, ferr)) {
-       if (checkNum (height))
+       if (matchNum (height))
             height.append (CharConst ("px"));
      } else if (match (key, CharConst ("alt"))) {
         alt = vval.textOut (wiki);
@@ -616,7 +616,7 @@ bool  WikiAttribInput::readAttribMore (const ustring& key, WikiMotorObjVec& vval
         ustring  v = vval.textOut (wiki);
         if (match (v, CharConst ("*"))) {
             elsize = 1;
-       } else if (checkNum (v)) {
+       } else if (matchNum (v)) {
             elsize = to_int32 (v);
             if (elsize < 0 || elsize > 999) {
                 elsize = 1;
@@ -679,7 +679,7 @@ void  WikiAttribInput::outputMore (MotorOutput* out) {
      wiki->outputName (out, CharConst ("size"), psize);
      wiki->outputName (out, CharConst ("size"), elsize);
      if (pwidth.size () > 0) {
-       if (checkNum (pwidth)) {
+       if (matchNum (pwidth)) {
             out->out_raw (CharConst (" style=\"width:"))->out_toHTML_noCtrl (pwidth)->out_raw (CharConst ("px;\""));
         } else {
             out->out_raw (CharConst (" style=\"width:"))->out_toHTML_noCtrl (pwidth)->out_raw (CharConst (";\""));
diff --git a/wiki/wikicmd.cc b/wiki/wikicmd.cc

index 2916aa9..03287d9 100644 (file)
--- a/wiki/wikicmd.cc
+++ b/wiki/wikicmd.cc
@@ -389,6 +389,16 @@ void  wc_evalblock (WikiLine* wl, WikiFormat* wiki) {
  }
  
  /* ============================================================ */
+static bool  matchSkipEqs (uiterator& b, uiterator e) {
+    if (b < e && *b == '=') {
+       do {
+           ++ b;
+       } while (b < e && *b == '=');
+       return true;
+    }
+    return false;
+}
+
  /*DOC:
  ===$insert===
   $insert:VARIABLE
@@ -411,8 +421,6 @@ void  wc_insert (WikiLine* wl, WikiFormat* wiki) {
      bool  super = false;
      bool  protect;
      int  i;
-    static uregex  re_eq ("=+");
-    umatch  m;
      
  #ifdef DEBUG
      std::cerr << "(wiki):" << ustring (wl->begin0, wl->end) << "\n";
@@ -427,14 +435,19 @@ void  wc_insert (WikiLine* wl, WikiFormat* wiki) {
      for (i = 1; i < args.size (); i ++) {
         if (! protect && match (args[i], CharConst ("superuser"))) {
             super = true;
-       } else if (usearch (args[i], m, re_eq)) {
-           hn = m[0].second - m[0].first;
-           if (hn < 0)
-               hn = 0;
-           if (hn > 5)
-               hn = 5;
         } else {
-           // bad parameter
+           uiterator  b = args[i].begin ();
+           uiterator  e = args[i].end ();
+           uiterator  p = b;
+           if (matchSkipEqs (b, e)) {
+               hn = b - p;
+               if (hn < 0)
+                   hn = 0;
+               if (hn > 5)
+                   hn = 5;
+           } else {
+               // bad parameter
+           }
         }
      }
      if (args.size () >= 1) {
diff --git a/wiki/wikienv.cc b/wiki/wikienv.cc

index 74756db..43023ab 100644 (file)
--- a/wiki/wikienv.cc
+++ b/wiki/wikienv.cc
@@ -3,8 +3,6 @@
  #include "ml.h"
  #include "ustring.h"
  
-uregex  re_wikicmdsep ("(:)|([ \t]+$)");
-
  void  MacroVar::setVar (const ustring& name, MNode* var, WikiLine::linevec* wl) {
      std::pair<MacroVar::iterator, bool>  x;
      erase (name);
diff --git a/wiki/wikienv.h b/wiki/wikienv.h

index 0dbbbac..cf95822 100644 (file)
--- a/wiki/wikienv.h
+++ b/wiki/wikienv.h
@@ -27,6 +27,4 @@ class  WikiEnv {
      virtual  ~WikiEnv () {};
  };
  
-extern uregex  re_wikicmdsep;
-
  #endif /* WIKIENV_H */
diff --git a/wiki/wikiformat.cc b/wiki/wikiformat.cc

index 43435c7..1530747 100644 (file)
--- a/wiki/wikiformat.cc
+++ b/wiki/wikiformat.cc
@@ -1536,29 +1536,56 @@ void  WikiBlockRaw::output (MotorOutput* out) {
  
  /* ============================================================ */
  void  WikiFormat::pass1 (const ustring& text, WikiLine::linevec* block, bool fsuper) {
-    Splitter  sp (text, re_nl);
+    SplitterNL  sp (text);
  
      pass1_1 (sp, NULL, NULL, block, NULL, NULL, NULL, fsuper);
  }
  
+static bool  findCmdSep (uiterator& b, uiterator e, uiterator& u) {
+    int  c;
+    uiterator  p = b;
+    for (; p < e; ++ p) {
+       c = *p;
+       if (c == ':') {
+           b = p;
+           u = b + 1;
+           return true;
+       } else if (c == ' ' || c == '\t') {
+           b = p;
+           u = b;
+           do {
+               ++ u;
+           } while (u < e && ((c = *u) == ' ' || c == '\t'));
+           return true;
+       }
+    }
+    b = p;
+    u = e;
+    return true;
+}
+
  int  WikiFormat::pass1_1 (Splitter& sp, ustring* elseword, ustring* endword, WikiLine::linevec* block, uiterator* elsebegin0, uiterator* elsebegin, uiterator* elseend, bool fsuper) {
      uiterator  b, e, t, u, v;
-    umatch  m;
+//    umatch  m;
+//    static uregex  re_wikicmdsep ("(:)|([ \t]+$)");
  
      while (sp.next ()) {
         b = sp.begin ();
         e = sp.end ();
-       while (b < e && b[0] == '\t')
+       while (b < e && b[0] == '\t')   // TABを無視
             b ++;
         if (matchSkip (b, e, CharConst (kComment))) {
             // comment
         } else if (b != e && b[0] == kWikiCmd) {
-           if (usearch (b, e, m, re_wikicmdsep)) {
-               t = b;
-               u = m[0].first;
-               v = m[0].second;
+//         if (usearch (b, e, m, re_wikicmdsep)) {
+//             t = b;
+//             u = m[0].first;
+//             v = m[0].second;
+           t = b;
+           u = b;
+           if (findCmdSep (u, e, v)) {
             } else {
-               t = b;
+//             t = b;
                 u = e;
                 v = e;
             }
@@ -1867,7 +1894,6 @@ void  WikiFormat::compileLine (WikiLineScanner& scanner) {
             blockp->push_back (cur);
             cur->addLine (b, e);
             push_block (&obj->block);
-//     } else if (curform == NULL && matchHead (b, e, CharConst ("{form:"))) {
         } else if (curform == NULL && matchHead (b, e, CharConst (uWikiFORM))) {
             WikiBlockComplex*  obj;
             if (cur)
diff --git a/wiki/wikiline.cc b/wiki/wikiline.cc

index 576df63..37244d4 100644 (file)
--- a/wiki/wikiline.cc
+++ b/wiki/wikiline.cc
@@ -378,6 +378,28 @@ bool  wl_color (WikiMotorObjVecVec* args, WikiMotorObjVec* arg2, WikiMotorObjVec
      return true;
  }
  
+static bool  matchAnchor (uiterator b, uiterator e) {
+    int  c;
+    static char  table_anchor[] = {    // [a-zA-Z0-9_\-]
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 
+       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 
+       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 
+    };
+    for (; b < e; ++ b) {
+       c = *b;
+       if (0 <= c && c < 128 && table_anchor[c]) {
+       } else {
+           return false;
+       }
+    }
+    return true;
+}
+
  /*DOC:
  ===アンカー===
   [[anchor:Fig1]]
@@ -386,11 +408,9 @@ bool  wl_color (WikiMotorObjVecVec* args, WikiMotorObjVec* arg2, WikiMotorObjVec
  //#WIKILINE    anchor  wl_anchor
  bool  wl_anchor (WikiMotorObjVec* arg, WikiMotorObjVec& out, WikiFormat* wiki) {
      ustring  name (arg->textOut (wiki));
-    umatch  m;
-    static uregex  re ("^[a-zA-Z0-9_-]+$");
      MotorOutputString  html;
  
-    if (! usearch (name, m, re))
+    if (! matchAnchor (name.begin (), name.end ()))
         return false;
      html.out_raw (CharConst ("<a"));
      wiki->outputName (&html, CharConst ("name"), name, false);
diff --git a/wiki/wikimotor.cc b/wiki/wikimotor.cc

index 18c4fe3..c29930f 100644 (file)
--- a/wiki/wikimotor.cc
+++ b/wiki/wikimotor.cc
@@ -383,7 +383,7 @@ bool  WikiMotorObjVec::splitURL (WikiFormat* wiki, ustring& proto, ustring& host
                 vpath.push_back (WikiMotorObjPtr (new WikiMotorObjText (uSlash)));
                 vec.splitChar ('/', vhost, vpath);
                 host = vhost.textOut (wiki);
-               if (! checkHostname (host)) {
+               if (! matchHostname (host)) {
                     wiki->errorMsg.append (host).append (CharConst (": bad hostname.\n"));
                     host = uEmpty;
                 }
@@ -400,7 +400,7 @@ bool  WikiMotorObjVec::splitURL (WikiFormat* wiki, ustring& url) {
      ustring  proto, host, path, params, anchor;
  
      if (splitURL (wiki, proto, host, path, params, anchor)) {
-       if (checkHostname (host)) {
+       if (matchHostname (host)) {
             url.assign (proto).append (CharConst ("://")).append (host).append (path);
             if (params.length () > 0)
                 url.append (CharConst ("?")).append (params);
@@ -436,7 +436,7 @@ bool  WikiMotorObjVec::splitURL_2 (WikiFormat* wiki, ustring& host, ustring& pat
             vpath.push_back (WikiMotorObjPtr (new WikiMotorObjText (uSlash)));
             fsp = vec.splitChar ('/', vhost, vpath);
             host = vhost.textOut (wiki);
-           if (! checkHostname (host)) {
+           if (! matchHostname (host)) {
                 wiki->errorMsg.append (host).append (CharConst (": bad hostname.\n"));
                 host = uEmpty;
             }
@@ -454,7 +454,7 @@ bool  WikiMotorObjVec::splitURL_2 (WikiFormat* wiki, const ustring& proto, ustri
      ustring  host, path, params, anchor;
  
      if (splitURL_2 (wiki, host, path, params, anchor)) {
-       if (checkHostname (host)) {
+       if (matchHostname (host)) {
             url.assign (proto).append (CharConst ("://")).append (host).append (path);
             if (params.length () > 0)
                 url.append (CharConst ("?")).append (params);
@@ -476,7 +476,7 @@ bool  WikiMotorObjVec::splitURL_3 (WikiFormat* wiki, ustring& port, ustring& pat
      vpath.push_back (WikiMotorObjPtr (new WikiMotorObjText (uSlash)));
      if (splitChar ('/', vport, vpath)) {
         ustring  v = vport.textOut (wiki);
-       if (checkNum (v)) {
+       if (matchNum (v)) {
             int n = strtoul (v);
             if (1 <= n && n < 65536) {
                 port = v;
diff --git a/wiki/wikimotor.h b/wiki/wikimotor.h

index e9d314e..a20d985 100644 (file)
--- a/wiki/wikimotor.h
+++ b/wiki/wikimotor.h
@@ -416,7 +416,7 @@ class  WikiMotor {
      static const int TMATCH_BAR2 = 0x10;
  
      WikiFormat*  wiki;
-    Splitter  sp;
+    SplitterRe  sp;
      boost::ptr_vector<WikiMotorObj>  z;
  
      WikiMotor (uiterator b, uiterator e, WikiFormat* w): sp (b, e, re_wiki1) {
author	visor <visor@users.sourceforge.jp>
	Mon, 16 Mar 2015 15:44:30 +0000 (00:44 +0900)
committer	visor <visor@users.sourceforge.jp>
	Tue, 17 Mar 2015 13:11:15 +0000 (22:11 +0900)
cgi/main.cc		patch \| blob \| history
ext/ml-sqlite3.cc		patch \| blob \| history
ext/ml-tcpserver.cc		patch \| blob \| history
lib/app.cc		patch \| blob \| history
lib/expr.cc		patch \| blob \| history
lib/form.cc		patch \| blob \| history
lib/formfile.cc		patch \| blob \| history
lib/formfile.h		patch \| blob \| history
lib/http.cc		patch \| blob \| history
lib/http.h		patch \| blob \| history
lib/ml.cc		patch \| blob \| history
lib/ml.h		patch \| blob \| history
lib/motor.cc		patch \| blob \| history
lib/motorenv.cc		patch \| blob \| history
lib/motoroutput.h		patch \| blob \| history
lib/ustring.h		patch \| blob \| history
lib/utf8.cc		patch \| blob \| history
lib/utf8.h		patch \| blob \| history
lib/util_apache.cc		patch \| blob \| history
lib/util_check.cc		patch \| blob \| history
lib/util_check.h		patch \| blob \| history
lib/util_const.cc		patch \| blob \| history
lib/util_const.h		patch \| blob \| history
lib/util_file.cc		patch \| blob \| history
lib/util_random.cc		patch \| blob \| history
lib/util_regex.h	[new file with mode: 0644]	patch \| blob
lib/util_splitter.h		patch \| blob \| history
lib/util_string.cc		patch \| blob \| history
lib/util_string.h		patch \| blob \| history
lib/util_tcp.cc		patch \| blob \| history
lib/util_tcp.h		patch \| blob \| history
modules/ml-addon.cc		patch \| blob \| history
modules/ml-apache.cc		patch \| blob \| history
modules/ml-config.cc		patch \| blob \| history
modules/ml-cookielogin.cc		patch \| blob \| history
modules/ml-formvar.cc		patch \| blob \| history
modules/ml-http.cc		patch \| blob \| history
modules/ml-motor.cc		patch \| blob \| history
modules/ml-neon.cc		patch \| blob \| history
modules/ml-sendmail.cc		patch \| blob \| history
modules/ml-store.cc		patch \| blob \| history
modules/ml-string.cc		patch \| blob \| history
modules/motor-function.cc		patch \| blob \| history
wiki/wikiattrib.cc		patch \| blob \| history
wiki/wikicmd.cc		patch \| blob \| history
wiki/wikienv.cc		patch \| blob \| history
wiki/wikienv.h		patch \| blob \| history
wiki/wikiformat.cc		patch \| blob \| history
wiki/wikiline.cc		patch \| blob \| history
wiki/wikimotor.cc		patch \| blob \| history
wiki/wikimotor.h		patch \| blob \| history