From af3c6d248b1a6698aba2875ca708a9610fa861d0 Mon Sep 17 00:00:00 2001 From: visor Date: Tue, 17 Mar 2015 00:44:30 +0900 Subject: [PATCH] reduce regex's. --- cgi/main.cc | 19 ++- ext/ml-sqlite3.cc | 24 ++- ext/ml-tcpserver.cc | 5 +- lib/app.cc | 24 +-- lib/expr.cc | 4 +- lib/form.cc | 2 +- lib/formfile.cc | 119 ++++++++++--- lib/formfile.h | 3 + lib/http.cc | 65 ++++---- lib/http.h | 2 +- lib/ml.cc | 256 +++++++++++++++++++++------- lib/ml.h | 3 +- lib/motor.cc | 57 +------ lib/motorenv.cc | 12 +- lib/motoroutput.h | 1 + lib/ustring.h | 11 +- lib/utf8.cc | 57 ++++--- lib/utf8.h | 1 + lib/util_apache.cc | 8 +- lib/util_check.cc | 77 +++++---- lib/util_check.h | 47 +++--- lib/util_const.cc | 14 -- lib/util_const.h | 13 -- lib/util_file.cc | 18 +- lib/util_random.cc | 1 + lib/util_regex.h | 17 ++ lib/util_splitter.h | 253 ++++++++++++++++++++++------ lib/util_string.cc | 413 +++++++++++++++++++++++++++++++++------------- lib/util_string.h | 22 ++- lib/util_tcp.cc | 2 +- lib/util_tcp.h | 1 + modules/ml-addon.cc | 78 ++++++--- modules/ml-apache.cc | 2 +- modules/ml-config.cc | 2 +- modules/ml-cookielogin.cc | 69 ++------ modules/ml-formvar.cc | 14 +- modules/ml-http.cc | 6 +- modules/ml-motor.cc | 4 +- modules/ml-neon.cc | 6 +- modules/ml-sendmail.cc | 30 ++-- modules/ml-store.cc | 4 +- modules/ml-string.cc | 3 +- modules/motor-function.cc | 2 + wiki/wikiattrib.cc | 24 +-- wiki/wikicmd.cc | 31 +++- wiki/wikienv.cc | 2 - wiki/wikienv.h | 2 - wiki/wikiformat.cc | 44 ++++- wiki/wikiline.cc | 26 ++- wiki/wikimotor.cc | 10 +- wiki/wikimotor.h | 2 +- 51 files changed, 1238 insertions(+), 674 deletions(-) create mode 100644 lib/util_regex.h diff --git a/cgi/main.cc b/cgi/main.cc index ac816ba..a8b050c 100644 --- a/cgi/main.cc +++ b/cgi/main.cc @@ -14,6 +14,7 @@ #include "util_string.h" #include "util_file.h" #include "util_mimetype.h" +#include "util_splitter.h" #include "ustring.h" #include #include @@ -34,14 +35,18 @@ static ustring glue1 (MotorEnv* env) { p = getenvString (kPATH_INFO); { - uiterator b = p.begin (); - uiterator e = p.end (); - umatch m; - while (usearch (b, e, m, re_slash)) { - a.push_back (ustring (b, m[0].first)); - b = m[0].second; +// uiterator b = p.begin (); +// uiterator e = p.end (); +// umatch m; +// while (usearch (b, e, m, re_slash)) { +// a.push_back (ustring (b, m[0].first)); +// b = m[0].second; +// } +// a.push_back (ustring (b, e)); + SplitterCh sp (p, '/'); + while (sp.next ()) { + a.push_back (sp.pre ()); } - a.push_back (ustring (b, e)); } if (cwd) { scriptfilename = ustring (cwd); diff --git a/ext/ml-sqlite3.cc b/ext/ml-sqlite3.cc index 4521e98..bbf61c5 100644 --- a/ext/ml-sqlite3.cc +++ b/ext/ml-sqlite3.cc @@ -510,23 +510,19 @@ MNode* ml_sqlite3_rowid (MNode* cell, MlEnv* mlenv, MLFunc* mobj) { } static ustring escape_like (const ustring& str) { - static uregex re ("[%_\\\\]"); - Splitter sp (str, re); - ustring ans; - - while (sp.next ()) { - ans.append (sp.begin (), sp.end ()); - switch (*sp.matchBegin ()) { - case '%': - case '_': - case '\\': + SplitterChars sp (str, ustring (CharConst ("%_\\"))); + if (sp.nextSep ()) { + ustring ans; + do { + ans.append (sp.pre ()); ans.append (CharConst ("\\")); ans.append (sp.matchBegin (), sp.matchEnd ()); - break; - default:; - } + } while (sp.nextSep ()); + ans.append (sp.pre ()); + return ans; + } else { + return str; } - return ans; } /*DOC: diff --git a/ext/ml-tcpserver.cc b/ext/ml-tcpserver.cc index 9e511cc..22a413f 100644 --- a/ext/ml-tcpserver.cc +++ b/ext/ml-tcpserver.cc @@ -53,7 +53,8 @@ void MLDbTcpserver::addAllow (const ustring& key, time_t span, MNode* rest, MlE while (rest) { estr = eval_str (rest->car (), mlenv); nextNode (rest); - if (! checkASCII (estr) || estr.length () > 1024) +// if (! checkASCII (estr) || estr.length () > 1024) + if (! matchASCII (estr.begin (), estr.end ()) || estr.length () > 1024) throw (estr + uErrorBadValue); val.append (CharConst ("+")).append (estr).append (1, 0); } @@ -156,7 +157,7 @@ MNode* ml_dbtcpserver (MNode* cell, MlEnv* mlenv) { if (name.size () == 0) throw (uErrorFilenameEmpty); - if (! checkName (name)) + if (! matchName (name)) throw (name + uErrorBadName); if (mlenv->env) { SigSafe sig; diff --git a/lib/app.cc b/lib/app.cc index fe56c44..046ead7 100644 --- a/lib/app.cc +++ b/lib/app.cc @@ -61,51 +61,51 @@ void AppEnv::readOption (int argc, char** argv, MotorEnv* env) { p = argv[i]; if (cmp (p, "datastore:")) { datastore = ustring (p); - if (! checkName (datastore)) + if (! matchName (datastore)) throw (datastore + uErrorBadDatastore); } else if (cmp (p, "get-html:")) { getHtml = ustring (p); - if (getHtml != uDash && ! checkResourceName (getHtml)) + if (getHtml != uDash && ! matchResourceName (getHtml)) throw (getHtml + uErrorBadFile); } else if (cmp (p, "post-html:")) { postHtml = ustring (p); - if (postHtml != uDash && ! checkResourceName (postHtml)) + if (postHtml != uDash && ! matchResourceName (postHtml)) throw (postHtml + uErrorBadFile); } else if (cmp (p, "post-file-html:")) { postFileHtml = ustring (p); - if (postFileHtml != uDash && ! checkResourceName (postFileHtml)) + if (postFileHtml != uDash && ! matchResourceName (postFileHtml)) throw (postFileHtml + uErrorBadFile); } else if (cmp (p, "html:")) { postFileHtml = postHtml = getHtml = ustring (p); - if (getHtml != uDash && ! checkResourceName (getHtml)) + if (getHtml != uDash && ! matchResourceName (getHtml)) throw (getHtml + uErrorBadFile); } else if (cmp (p, "error-html:")) { errorHtml = ustring (p); - if (errorHtml != uDash && ! checkResourceName (errorHtml)) + if (errorHtml != uDash && ! matchResourceName (errorHtml)) throw (errorHtml + uErrorBadFile); } else if (cmp (p, "get-ml:")) { getML = ustring (p); - if (getML != uDash && ! checkResourceName (getML)) + if (getML != uDash && ! matchResourceName (getML)) throw (getML + uErrorBadFile); } else if (cmp (p, "post-ml:")) { postML = ustring (p); - if (postML != uDash && ! checkResourceName (postML)) + if (postML != uDash && ! matchResourceName (postML)) throw (postML + uErrorBadFile); } else if (cmp (p, "post-file-ml:")) { postFileML = ustring (p); - if (postFileML != uDash && ! checkResourceName (postFileML)) + if (postFileML != uDash && ! matchResourceName (postFileML)) throw (postFileML + uErrorBadFile); } else if (cmp (p, "ml:")) { postML = getML = ustring (p); - if (getML != uDash && ! checkResourceName (getML)) + if (getML != uDash && ! matchResourceName (getML)) throw (getML + uErrorBadFile); } else if (cmp (p, "type:")) { mimetype = ustring (p); - if (! checkMimeType (mimetype)) + if (! matchMimeType (mimetype)) throw (mimetype + ": bad mime type."); } else if (cmp (p, "to-code:")) { ocode = ustring (p); - if (! checkName (ocode)) + if (! matchName (ocode)) throw (ocode + ": bad encoding name."); } else if (cmp (p, "post-limit:")) { int num = boost::lexical_cast (p); diff --git a/lib/expr.cc b/lib/expr.cc index e6ed86f..d753fad 100644 --- a/lib/expr.cc +++ b/lib/expr.cc @@ -77,7 +77,7 @@ MNode* eval (MNode* cell, MlEnv* mlenv) { && (cell->cdr () == NULL || cell->cdr ()->isCons ())) { return callFunc (cell, mlenv); } else { - throw (cell->dump_string_short () + ustring (": error")); + throw (cell->dump_string_short () + ustring (CharConst (": error"))); } break; case MNode::MC_STR: @@ -221,7 +221,7 @@ bool eval_bool (MNode* cell, MlEnv* mlenv) { ustring eval_file (MNode* cell, MlEnv* mlenv) { ustring ans = eval_str (cell, mlenv); - if (! checkFilename (ans)) // XXX dummy + if (! matchFilename (ans)) // XXX dummy ans.resize (0); return ans; } diff --git a/lib/form.cc b/lib/form.cc index f8f3854..f49b121 100644 --- a/lib/form.cc +++ b/lib/form.cc @@ -176,7 +176,7 @@ int CGIForm::insert (map_t& mp, const ustring& name, const ustring& value) { int ans = -1; #ifdef STRICT_FORMVAR - if (checkName (name)) + if (matchName (name)) #else if (name.length () > 0 && name.length () < 64) #endif diff --git a/lib/formfile.cc b/lib/formfile.cc index 441a724..8c782d7 100644 --- a/lib/formfile.cc +++ b/lib/formfile.cc @@ -22,6 +22,19 @@ int CGIFormFile::partAt (int i) { } } +ustring CGIFormFile::typeAt (int i) { + if (i >= 0) { + tary_t::iterator it = typemap.find (i); + if (it == typemap.end ()) { + return uEmpty; + } else { + return it->second; + } + } else { + return uEmpty; + } +} + void CGIFormFile::read_multipart (MotorEnv* env) { #ifdef DEBUG2 std::cerr << "boundary:" << boundary << "\n"; @@ -123,8 +136,10 @@ void CGIFormFile::searchPart (MotorEnv* env) { fix (name); parts.push_back (part (b, x)); fix (filename); + fix (type); k1 = insert (iarg, name, filePart_osSafe (filename)); datamap.insert (sary_t::value_type (k1, k2)); + typemap.insert (tary_t::value_type (k2, type)); #ifdef DEBUG2 std::cerr << "insert(" << k1 << "," << k2 << ")\n"; #endif /* DEBUG */ @@ -180,10 +195,72 @@ void CGIFormFile::compileReg () { reN.assign (a); } +class ChSplitterNL { + public: + char* b; // 先頭 + char* t; // 区切り文字列先頭 + char* u; // 区切り文字列末尾 + char* e; // 末尾 + + ChSplitterNL (char* _begin, char* _end) { + b = t = u = _begin; + e = _end; + }; + ~ChSplitterNL () {}; + + bool isEnd () { + return b == e; + }; + ustring pre () { + return ustring (b, t); + }; + bool next () { + b = t = u; + if (b < e) { + if (findNL ()) { + } else { + t = u = e; + } + return true; + } else { + return false; + } + }; + bool nextSep () { + b = t = u; + if (b < e) { + if (findNL ()) { + return true; + } else { + t = u = e; + return false; + } + } else { + t = u = e; + return false; + } + }; + bool findNL () { + for (; t < e; ++ t) { + if (*t == '\n') { + u = t + 1; + return true; + } else if (*t == '\r') { + u = t + 1; + if (u < e && *u == '\n') + ++ u; + return true; + } + } + return false; + }; +}; + void CGIFormFile::readMimeHead (char*& b, char* e, ustring& disp, ustring& name, ustring& filename, ustring& type) { - boost::match_results m; +// boost::match_results m; + ChSplitterNL sp (b, e); boost::match_results m2; - char* x; +// char* x; static uregex re_disp1 ("^Content-Disposition:\\s*(.*);\\s*name=\"(.*)\";\\s*filename=\"(.*)\"$"); static uregex re_disp2 ("^Content-Disposition:\\s*(.*);\\s*name=\"(.*)\"$"); static uregex re_type ("^Content-Type:\\s*([a-zA-Z_0-9/.+-]*)(;\\s*(.*))?$"); @@ -192,42 +269,40 @@ void CGIFormFile::readMimeHead (char*& b, char* e, ustring& disp, ustring& name name.resize (0); filename.resize (0); type.resize (0); - while (b != e && regex_search (b, e, m, re_nl, boost::regex_constants::match_single_line)) { - x = m[0].first; +// while (b != e && regex_search (b, e, m, re_nl, boost::regex_constants::match_single_line)) { +// x = m[0].first; + while (sp.next ()) { #ifdef DEBUG2 - std::cerr << "line:" << ustring (b, x) << "\n"; +// std::cerr << "line:" << ustring (b, x) << "\n"; + std::cerr << "line:" << sp.pre () << "\n"; #endif /* DEBUG */ - if (b == x) { // empty line - b = m[0].second; +// if (b == x) { // empty line +// b = m[0].second; + if (sp.b == sp.t) { + b = sp.u; break; } - if (regex_search (b, x, m2, re_disp1, boost::regex_constants::match_single_line)) { +// if (regex_search (b, x, m2, re_disp1, boost::regex_constants::match_single_line)) { + if (regex_search (sp.b, sp.t, m2, re_disp1, boost::regex_constants::match_single_line)) { disp.assign (m2[1].first, m2[1].second - m2[1].first); name.assign (m2[2].first, m2[2].second - m2[2].first); filename.assign (m2[3].first, m2[3].second - m2[3].first); - } else if (regex_search (b, x, m2, re_disp2, boost::regex_constants::match_single_line)) { +// } else if (regex_search (b, x, m2, re_disp2, boost::regex_constants::match_single_line)) { + } else if (regex_search (sp.b, sp.t, m2, re_disp2, boost::regex_constants::match_single_line)) { disp.assign (m2[1].first, m2[1].second - m2[1].first); name.assign (m2[2].first, m2[2].second - m2[2].first); - } else if (regex_search (b, x, m2, re_type, boost::regex_constants::match_single_line)) { +// } else if (regex_search (b, x, m2, re_type, boost::regex_constants::match_single_line)) { + } else if (regex_search (sp.b, sp.t, m2, re_type, boost::regex_constants::match_single_line)) { type.assign (m2[1].first, m2[1].second - m2[1].first); } else { #ifdef DEBUG2 - std::cerr << "not match:" << ustring (b, x) << "\n"; +// std::cerr << "not match:" << ustring (b, x) << "\n"; + std::cerr << "not match:" << sp.pre () << "\n"; #endif /* DEBUG */ } - b = m[0].second; - } -} - -#if 0 -bool CGIFormFile::readFilename (int i, ustring& filename) { - if (0 <= i && i < parts.size ()) { - filename = filenames[i]; - return true; +// b = m[0].second; } - return false; } -#endif bool CGIFormFile::saveFile (int i, const ustring& path, size_t max) { static size_t bsize = 65536; diff --git a/lib/formfile.h b/lib/formfile.h index 48f3bc7..2e9f58b 100644 --- a/lib/formfile.h +++ b/lib/formfile.h @@ -11,6 +11,7 @@ class CGIFormFile: public CGIForm { public: typedef std::pair part; typedef boost::unordered_map sary_t; + typedef boost::unordered_map tary_t; ustring tmpfile; uregex re1; @@ -19,6 +20,7 @@ class CGIFormFile: public CGIForm { char* mapdata; size_t mapsize; sary_t datamap; + tary_t typemap; std::vector parts; CGIFormFile () { @@ -29,6 +31,7 @@ class CGIFormFile: public CGIForm { }; virtual int partAt (int i); + virtual ustring typeAt (int i); virtual void read_multipart (MotorEnv* env); virtual bool saveData (MotorEnv* env); virtual void unlinkTmpFile (); diff --git a/lib/http.cc b/lib/http.cc index 08d210f..d380d3d 100644 --- a/lib/http.cc +++ b/lib/http.cc @@ -46,8 +46,6 @@ void HTTPResponse::setCookie (const ustring& key, const ustring& val, const ust ustring ck; ustring u; size_t len; - umatch m; - static uregex re ("//|/\\.|\\.\\.|[\\x00-\\x20\\x7f-\\xff]"); if (key.size () <= 128 && val.size () <= 512) { ck = cookieencode (key); @@ -67,7 +65,7 @@ void HTTPResponse::setCookie (const ustring& key, const ustring& val, const ust ck.append (CharConst ("; expires=")); ck.append (dateCookie (limit)); } - if (domain.size () > 0 && checkDomain_dot (domain)) { // ??? + if (domain.size () > 0 && matchDomain_dot (domain)) { // ??? ck.append (CharConst ("; domain=")); ck.append (domain); } @@ -79,7 +77,7 @@ void HTTPResponse::setCookie (const ustring& key, const ustring& val, const ust } } -void HTTPResponse::setCookiePair (uiterator& b, const uiterator& e) { +void HTTPResponse::setCookiePair (uiterator b, const uiterator e) { uiterator m; ustring key, val; @@ -97,21 +95,13 @@ void HTTPResponse::setCookiePair (uiterator& b, const uiterator& e) { } void HTTPResponse::parseCookie () { - uiterator b, e; - umatch m; - static uregex re (" *; *"); - cookieDone = true; cookie = getenvString (kHTTP_COOKIE); - b = cookie.begin (); - e = cookie.end (); - while (usearch (b, e, m, re)) { - if (b != m[0].first) - setCookiePair (b, m[0].first); - b = m[0].second; + SplitterFn sp (cookie, findSepColon); + while (sp.next ()) { + if (sp.preSize () > 0) + setCookiePair (sp.begin (), sp.end ()); } - if (b != e) - setCookiePair (b, e); } ustring HTTPResponse::readCookie (const ustring& key) { @@ -193,22 +183,19 @@ void HTTPResponse::standardResponse_html (MotorOutput* out, MotorEnv* env) { void HTTPResponse::disposition (MotorOutput* out, bool finline, const ustring& name) { ustring n2; - Splitter sp (name, re_q); - - if (sp.next ()) { - if (sp.match (0)) { - n2.reserve (name.length ()); - n2.append (sp.begin (), sp.end ()); + SplitterCh sp (name, '\"'); + + if (sp.nextSep ()) { + n2.reserve (name.length ()); + n2.append (sp.pre ()); + n2.append (uUScore); + while (sp.nextSep ()) { + n2.append (sp.pre ()); n2.append (uUScore); - while (sp.next ()) { - n2.append (sp.begin (), sp.end ()); - if (sp.match (0)) { - n2.append (uUScore); - } - } - } else { - n2 = name; } + n2.append (sp.pre ()); + } else { + n2 = name; } if (finline) { out->out_raw (CharConst (kRES_DISP ": " kINLINE)); @@ -291,12 +278,20 @@ void HTTPResponse::forbiddenResponse (MotorOutput* out, MotorEnv* env) { } void HTTPResponse::setHeader (const ustring& key, const ustring& val) { - umatch m; - static uregex re ("^[a-zA-Z_0-9-]+$"); - - if (!usearch (key, m, re)) + static char table_httpheader[] = { // [a-zA-Z_0-9-] + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + }; + + if (! matchWordTbl (key.begin (), key.end (), table_httpheader)) throw (key + ": bad header name."); - if (!checkASCII (val)) + if (! matchASCII (val.begin (), val.end ())) throw (val + ": bad header value."); moreheader.push_back (std::pair (key, val)); } diff --git a/lib/http.h b/lib/http.h index 4695910..c4cf8c7 100644 --- a/lib/http.h +++ b/lib/http.h @@ -33,7 +33,7 @@ class HTTPResponse { virtual void printNoCache (MotorOutput* out); virtual void printCookie (MotorOutput* out, MotorEnv* env); virtual void setCookie (const ustring& key, const ustring& val, const ustring& path, time_t span, time_t limit, const ustring& domain, bool fsecure, MotorEnv* env); - virtual void setCookiePair (uiterator& b, const uiterator& e); + virtual void setCookiePair (uiterator b, const uiterator e); virtual void parseCookie (); virtual ustring readCookie (const ustring& key); virtual void setRandomCookie (MotorEnv* env); diff --git a/lib/ml.cc b/lib/ml.cc index 69741a2..16d44b9 100644 --- a/lib/ml.cc +++ b/lib/ml.cc @@ -11,6 +11,126 @@ #include #include #include +#include + +static bool findSymSp (uiterator& b, uiterator e) { + int c; + for (; b < e; ++ b) { + c = *b; + if (c == '\\' || c < ' ') // 空白を含めない + return true; + } + return false; +} + +static bool matchSymOct (uiterator& b, uiterator e) { + int c; + int n = 0; + uiterator p = b; + for (; p < e && n < 3; ++ p, ++ n) { + c = *p; + if ('0' <= c && c <= '7') { + } else { + return false; + } + } + b = p; + return true; +} + +static bool matchRealNum (uiterator&b, uiterator e) { + uiterator p = b; + int c; + bool f = false; + if (p < e) { + c = *p; + if (c == '+' || c == '-') + ++ p; + while (1) { + if (p == e) { + goto Ep1; + } else if (isdigit ((c = *p))) { + ++ p; + f = true; + } else if (c == '.') { + ++ p; + break; + } else { + goto Ep1; + } + } + while (1) { + if (p == e) { + goto Ep1; + } else if (isdigit ((c = *p))) { + ++ p; + f = true; + } else if (c == 'e' || c == 'E') { + ++ p; + break; + } else { + goto Ep1; + } + } + if (p == e) + return false; + c = *p; + if (c == '+' || c == '-') + ++ p; + if (p == e) + return false; + while (1) { + if (p == e) { + goto Ep1; + } else if (isdigit ((c = *p))) { + ++ p; + } else { + break; + } + } + } + Ep1: + if (f) { + b = p; + return true; + } else { + return false; + } +} + +static bool matchSymbol_c (int c) { + static char table_symbol[] = { // x00-x20"'();[]{} + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, + }; + return c < 0 || 128 <= c || table_symbol[c]; +} +inline bool matchSymbol (uiterator&b, uiterator e) { + return matchHeadFn (b, e, matchSymbol_c); +} + +static bool findNonSymbol_c (int c) { // '\'を含める + static char table_nonsymbol[] = { // x00-x20"'();[]{}\\ ; + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, + }; + return 0 <= c && c < 128 && table_nonsymbol[c]; +} +inline bool findNonSymbol (uiterator&b, uiterator e) { // '\'を含める + return findCharFn (b, e, findNonSymbol_c); +} void MNode::fdelete () { #ifdef DEBUG3 @@ -185,15 +305,19 @@ ustring MNode::sym_to_string () { assert (type == MC_SYM); uiterator b = sym->begin (); uiterator e = sym->end (); - umatch m; - if (b < e && usearch (b, e, m, re_nonsymbolchar)) { + uiterator p; + p = b; + if (findNonSymbol (b, e)) { ustring ans; do { - ans.append (b, m[0].first).append (1, '\\').append (octchar (*m[0].first)); - b = m[0].second; - } while (b < e && usearch (b, e, m, re_nonsymbolchar)); - if (b < e) - ans.append (b, e); + if (p < b) + ans.append (p, b); + ans.append (1, '\\').append (octchar (*b)); + ++ b; + p = b; + } while (b < e && findNonSymbol (b, e)); + if (p < e) + ans.append (p, e); return ans; } else { return *sym; @@ -340,33 +464,36 @@ MNode* quoted (MNode* v) { } MNode* newMNode_sym (ustring* v) { - static uregex re_special ("[\\x00-\x1f\\\\]"); - static uregex re_oct ("^[0-7][0-7][0-7]"); MNode* ans = new MNode; uiterator b = v->begin (); uiterator e = v->end (); - umatch m; + uiterator p; + int c; - if (b < e && usearch (b, e, m, re_special)) { + p = b; + if (findSymSp (b, e)) { ustring* w = new ustring; do { - w->append (b, m[0].first); - b = m[0].second; - if (*(m[0].first) == '\\') { - if (b < e && usearch (b, e, m, re_oct)) { - int c = octchar (b); + if (p < b) + w->append (p, b); + c = *b; + ++ b; + if (c == '\\') { + p = b; + if (matchSymOct (b, e)) { + c = octchar (p); if (32 <= c && c < 127) w->append (1, c); - b = m[0].second; } else { w->append (1, '\\'); } } else { // skip; } - } while (b < e && usearch (b, e, m, re_special)); - if (b < e) - w->append (b, e); + p = b; + } while (findSymSp (b, e)); + if (p < e) + w->append (p, e); ans->set_sym (w); delete v; } else { @@ -581,7 +708,7 @@ ustring formatString (const ustring& format, boost::ptr_vector& par) umatch m; u_int i; MNode* a; - static uregex re ("\\$\\{([1-9][0-9]*)(:([a-zA-Z][a-zA-Z0-9]*)(:([0-9a-z.:]+))?)?\\}"); + uregex re ("\\$\\{([1-9][0-9]*)(:([a-zA-Z][a-zA-Z0-9]*)(:([0-9a-z.:]+))?)?\\}"); static struct { const char* name; size_t namelen; @@ -618,7 +745,7 @@ ustring formatString (const ustring& format, boost::ptr_vector& par) std::vector fpar; int i; if (m[4].matched) - split (m[5].first, m[5].second, re_colon, fpar); + split (m[5].first, m[5].second, ':', fpar); for (i = 0; formatFunc[i].name; i ++) { if (match (m[3].first, m[3].second, formatFunc[i].name, formatFunc[i].namelen)) { (*formatFunc[i].fn) (ans, a, fpar); @@ -801,7 +928,7 @@ void MotorTexp::scan (const ustring& text, bool skip) { } while (b != e) { if (skip) { - skipWhite (linenum, b, e); + skipBlank (linenum, b, e); if (*b == '<') { #ifdef DEBUG // std::cerr << "---:" << ustring (b, b + 20) << "\n"; @@ -819,22 +946,18 @@ void MotorTexp::scan (const ustring& text, bool skip) { } void MotorTexp::skipHead (uiterator& b, uiterator& e, int& linenum) { - umatch m; - while (b != e && (*b != '(' && *b != ';')) { - if (usearch (b, e, m, re_nl)) { - b = m[0].second; + while (b < e && (*b != '(' && *b != ';')) { + if (findNLb (b, e)) { linenum ++; - } else { - b = e; } } } void MotorTexp::scanWord (int& linenum, uiterator& b, uiterator& e, word_type& type, ustring*& ans) { ustring::value_type c; - umatch m; + uiterator p; Ep1:; - skipWhite (linenum, b, e); + skipBlank (linenum, b, e); if (b != e) { c = *b; switch (c) { @@ -858,13 +981,11 @@ void MotorTexp::scanWord (int& linenum, uiterator& b, uiterator& e, word_type& break; case ';': b ++; - if (usearch (b, e, m, re_nl)) { - b = m[0].second; + if (findNLb (b, e)) { linenum ++; goto Ep1; } else { // end of comment but without nl. - b = e; type = YYNONE; } break; @@ -885,22 +1006,20 @@ void MotorTexp::scanWord (int& linenum, uiterator& b, uiterator& e, word_type& type = YYREN3; break; default: - if (usearch (b, e, m, re_realnumber)) { + p = b; + if (matchRealNum (b, e)) { type = YYNUM; - ans = new ustring (m[0]); - b = m[0].second; - } else if (usearch (b, e, m, re_symbol)) { - assert (b == m[0].first); - if (*b == '.' && m[0].second - m[0].first == 1) { + ans = new ustring (p, b); + } else if (matchSymbol (b, e)) { + if (*p == '.' && b - p == 1) { type = YYPERIOD; - b = m[0].second; } else { type = YYSIM; - ans = new ustring (m[0]); - b = m[0].second; + ans = new ustring (p, b); } - skipWhite (linenum, b, e); + skipBlank (linenum, b, e); } else { +// std::cerr << "error\n"; assert (0); } } @@ -911,11 +1030,9 @@ void MotorTexp::scanWord (int& linenum, uiterator& b, uiterator& e, word_type& ustring* MotorTexp::scanText (int& linenum, uiterator& b, uiterator& e) { ustring::value_type c; - uiterator p = b; + uiterator p; ustring* ans = new ustring; - umatch m; uint32_t v; - static uregex re_hex4 ("^[0-9a-fA-F]{4}"); ans->reserve (128); while (b != e) { @@ -945,14 +1062,12 @@ ustring* MotorTexp::scanText (int& linenum, uiterator& b, uiterator& e) { linenum ++; break; default: - if (e - b >= 4 && usearch (b, e, m, re_hex4)) { - v = hextoul (m[0].first, m[0].second); - if (v >= 32) { + p = b; + if (e - b >= 4 && matchHex4 (b, e)) { + v = hextoul (p, b); + if (v == '\t' || v == '\n' || v == '\r' || v >= 32) { std::wstring w (1, v); ans->append (wtou (w)); - b += 4; - } else { - goto bp1; } break; } @@ -973,14 +1088,22 @@ ustring* MotorTexp::scanText (int& linenum, uiterator& b, uiterator& e) { return ans; } -void MotorTexp::skipWhite (int& linenum, uiterator& b, uiterator& e) { +void MotorTexp::skipBlank (int& linenum, uiterator& b, uiterator& e) { ustring::value_type c; - for (; b != e; b ++) { // hack + while (b < e) { // hack c = *b; if (0 <= c && c <= ' ') { - if (c == '\n') { - linenum ++; + if (c == '\r') { + ++ linenum; + ++ b; + if (b < e && *b == '\n') + ++ b; + } else if (c == '\n') { + ++ linenum; + ++ b; + } else { + ++ b; } } else { break; @@ -1008,6 +1131,8 @@ void MotorTexp::scanQuote (int& linenum, uiterator& b, uiterator& e, MNode* cel if (b != e) { if (scanTexp (linenum, b, e, cell, false, YYNONE)) throw (uErrorSyntax); // ') + } else { + throw (uErrorSyntax); } } @@ -1179,6 +1304,23 @@ bool MotorTexp::scanTexp (int& linenum, uiterator& b, uiterator& e, MNode*& cel return false; } +bool MotorTexp::matchHex4 (uiterator& b, uiterator e) { + uiterator p; + int n = 0; + int c; + for (p = b; p < e && n < 4; ++ p, ++ n) { + c = *b; + if (('0' <= c && c <= '9') + || ('a' <= c && c <= 'f') + || ('A' <= c && c <= 'F')) { + } else { + return false; + } + } + b = p; + return true; +} + void nextNode (MNode*& arg) { if (arg) { switch (arg->type) { diff --git a/lib/ml.h b/lib/ml.h index 86ed5a4..e7651b6 100644 --- a/lib/ml.h +++ b/lib/ml.h @@ -459,12 +459,13 @@ class MotorTexp { virtual void skipHead (uiterator& b, uiterator& e, int& linenum); virtual void scanWord (int& linenum, uiterator& b, uiterator& e, word_type& type, ustring*& ans); virtual ustring* scanText (int& linenum, uiterator& b, uiterator& e); - virtual void skipWhite (int& linenum, uiterator& b, uiterator& e); + virtual void skipBlank (int& linenum, uiterator& b, uiterator& e); virtual bool scanCar (int& linenum, uiterator& b, uiterator& e, MNode* cell); virtual void scanQuote (int& linenum, uiterator& b, uiterator& e, MNode* cell); virtual void scanVector (int& linenum, uiterator& b, uiterator& e, MNode* cell); virtual void scanTable (int& linenum, uiterator& b, uiterator& e, MNode* cell); virtual bool scanTexp (int& linenum, uiterator& b, uiterator& e, MNode*& cell, bool qcdr, word_type closing); + virtual bool matchHex4 (uiterator& b, uiterator e); }; class MLFunc { diff --git a/lib/motor.cc b/lib/motor.cc index 69ee2d4..d3467d2 100644 --- a/lib/motor.cc +++ b/lib/motor.cc @@ -4,6 +4,7 @@ #include "mlenv.h" #include "expr.h" #include "util_const.h" +#include "util_check.h" #include "util_file.h" #include "util_splitter.h" #include "util_string.h" @@ -303,13 +304,8 @@ void HTMLMotor::compile (const ustring& text, bool skipHead) { begin = text.begin (); end = text.end (); if (skipHead) { - umatch m; while (begin != end && *begin != '<') { - if (usearch (begin, end, m, re_nl)) { - begin = m[0].second; - } else { - begin = end; - } + findNLb (begin, end); } } s1 (&objs, term_none, NULL); @@ -554,7 +550,7 @@ int HTMLMotor::s3 (MotorObj::MotorObjVec* sobjs, uiterator start, bool fbang, u int HTMLMotor::s4 (MotorObj::MotorObjVec* sobjs, uiterator start, upair name) { // [[NAME: // start points after the colon. - umatch m; +// umatch m; #ifdef DEBUG2 cerr << "s4 () start:" << ustring (start, start + 8) << "... name:" << ustring (name.first, name.second) << "\n"; @@ -563,7 +559,7 @@ int HTMLMotor::s4 (MotorObj::MotorObjVec* sobjs, uiterator start, upair name) { if (! s5 (sobjs, start)) { return 0; // NG } - } else if (usearch (name.first, name.second, m, re_digits)) {// [[NUM: + } else if (matchNum (name.first, name.second)) {// [[NUM: AutoDelete o; uiterator b = begin; @@ -574,10 +570,8 @@ int HTMLMotor::s4 (MotorObj::MotorObjVec* sobjs, uiterator start, upair name) { return 0; // NG } sobjs->push_back (o.release ()); - } else { // [[FUNC: - if (! s8 (sobjs, name, start)) { - return 0; - } + } else if (! s8 (sobjs, name, start)) { // [[FUNC: + return 0; } return 1; } @@ -646,8 +640,6 @@ int HTMLMotor::s7 (MotorObj::MotorObjVec* sobjs) { int HTMLMotor::s8 (MotorObj::MotorObjVec* sobjs, upair name, uiterator start) { // [[FUNC: ustring s; -// static uregex re_colon_bra ("(:)|(\\]\\]" cECOM ")|&(.);"); -// Splitter sp (start, end, re_colon_bra); MFTable::iterator it; MFTable2::iterator it2; @@ -675,44 +667,11 @@ int HTMLMotor::s8 (MotorObj::MotorObjVec* sobjs, upair name, uiterator start) { sobjs->push_back (o.release ()); } return 1; // OK - -#if 0 -#ifdef DEBUG2 - std::cerr << "s8 start:" << *start << "\n"; -#endif /* DEBUG */ - while (sp.next ()) { - s.append (sp.begin (), sp.end ()); - if (sp.match (1)) { // : - o ()->args.push_back (s); - s.resize (0); - } else if (sp.match (2)) { // ]] - o ()->args.push_back (s); - s.resize (0); - begin = sp.matchEnd (); - goto Ex1; - } else if (sp.match (3)) { // &x; - s.append (sp.matchBegin (3), sp.matchEnd (3)); - } else if (! sp.match (0)) { - return 0; // NG - } else { - assert (0); - } - } -#ifdef DEBUG2 - std::cerr << "unexpected end\n"; -#endif /* DEBUG */ - return 0; // NG - - Ex1:; - sobjs->push_back (o.release ()); - - return 1; -#endif } int HTMLMotor::s9 (std::vector& args, uiterator start) { static uregex re_colon_bra ("(:)|(\\]\\]" cECOM ")|&(.);"); - Splitter sp (start, end, re_colon_bra); + SplitterRe sp (start, end, re_colon_bra); ustring s; while (sp.next ()) { @@ -738,7 +697,7 @@ int HTMLMotor::s9 (std::vector& args, uiterator start) { int HTMLMotor::s10 (std::vector& args, MotorObj::MotorObjVec& arg2, uiterator start) { static uregex re_colon_spc_bra ("(:)|( )|(\\]\\]" cECOM ")|&(.);"); - Splitter sp (start, end, re_colon_spc_bra); + SplitterRe sp (start, end, re_colon_spc_bra); ustring s; while (sp.next ()) { diff --git a/lib/motorenv.cc b/lib/motorenv.cc index c80a4ec..e8d827e 100644 --- a/lib/motorenv.cc +++ b/lib/motorenv.cc @@ -113,7 +113,7 @@ bool MotorEnv::path_resource (const ustring& name, ustring& ans) { if (top.length () > 0) { r = top + name; shapePath (r); - if (matchHead (r, top) && checkAbsoluteResourceName (r) && isPlainFile (r)) { + if (matchHead (r, top) && matchAbsoluteResourceName (r) && isPlainFile (r)) { ans = r; return true; } else { @@ -143,7 +143,7 @@ bool MotorEnv::path_resource (const ustring& name, ustring& ans) { #ifdef DEBUG // std::cerr << "r:" << r << "\n"; #endif /* DEBUG */ - if (matchHead (r, top) && checkAbsoluteResourceName (r) && isPlainFile (r)) { + if (matchHead (r, top) && matchAbsoluteResourceName (r) && isPlainFile (r)) { ans = r; return true; #ifdef STANDALONE @@ -156,7 +156,7 @@ bool MotorEnv::path_resource (const ustring& name, ustring& ans) { #ifdef STANDALONE r = name; shapePath (r); - if (checkResourceName (r) && isPlainFile (r)) { + if (matchResourceName (r) && isPlainFile (r)) { ans = r; return true; } @@ -172,7 +172,7 @@ bool MotorEnv::path_resource (const ustring& name, ustring& ans) { if (documentRoot.length () > 0 && r.length () > 0) { r = documentRoot + uSlash + r + uSlash + name; shapePath (r); - if (matchHead (r, top) && checkAbsoluteResourceName (r) && isPlainFile (r)) { + if (matchHead (r, top) && matchAbsoluteResourceName (r) && isPlainFile (r)) { ans = r; return true; } @@ -262,7 +262,7 @@ ustring MotorEnv::path_storage_file (const ustring& name, const char* suffix) { ustring MotorEnv::path_static_file (const ustring& name) { ustring ans; - if (! checkResourceName (name)) + if (! matchResourceName (name)) throw (name + uErrorBadFile); if (path_resource (name, ans)) { } else { @@ -335,7 +335,7 @@ void MotorEnv::setDatastore (const ustring& name) { if (name.size () == 0) { datastore = appenv->datastore; } else { - if (! checkName (name)) + if (! matchName (name)) throw (name + uErrorBadDatastore); datastore = name; } diff --git a/lib/motoroutput.h b/lib/motoroutput.h index 4a2b2a6..da081ef 100644 --- a/lib/motoroutput.h +++ b/lib/motoroutput.h @@ -2,6 +2,7 @@ #define MOTOROUTPUT_H #include "util_const.h" +#include "util_regex.h" #include "ustring.h" class MotorOutput { diff --git a/lib/ustring.h b/lib/ustring.h index ea9fd30..1d2d2d6 100644 --- a/lib/ustring.h +++ b/lib/ustring.h @@ -3,7 +3,7 @@ #include #include -#include +#include inline char* char_type (u_char* v) {return (char*)v;} inline char* char_type (char* v) {return v;} @@ -18,15 +18,6 @@ inline char* noconst_char (const char* v) {return (char*)v;} typedef std::basic_string ustring; typedef ustring::const_iterator uiterator; typedef std::pair upair; -typedef boost::match_results umatch; -typedef boost::basic_regex > uregex; - -inline bool usearch (ustring::const_iterator first, ustring::const_iterator last, umatch& m, const uregex& re, boost::match_flag_type flags = boost::regex_constants::match_single_line) { - return regex_search (first, last, m, re, flags); -} -inline bool usearch (const ustring& s, umatch& m, const uregex& re, boost::match_flag_type flags = boost::regex_constants::match_single_line) { - return regex_search (s.begin (), s.end (), m, re, flags); -} inline int match (upair& p, const u_char* s, ustring::size_type len) { ustring::size_type n = p.second - p.first; diff --git a/lib/utf8.cc b/lib/utf8.cc index 12a0b6a..3ba18b0 100644 --- a/lib/utf8.cc +++ b/lib/utf8.cc @@ -1,5 +1,6 @@ #include "utf8.h" #include "util_const.h" +#include "util_splitter.h" #include "ustring.h" #include "cdbobj.h" #include @@ -271,28 +272,38 @@ ustring ellipsis (const ustring& text, int limit) { return u; } -ustring logText (const ustring& text) { - uiterator b = text.begin (); - uiterator e = text.end (); - umatch m; - ustring u; - static uregex re ("[\\000-\\037\\0177]"); - - u.reserve (256); - while (usearch (b, e, m, re)) { - if (b != m[0].first) - u += ustring (b, m[0].first); - if (*m[0].first == '\n') { - u += "//"; - } else { - u += '_'; +static bool findCtrlChar (uiterator& b, uiterator e, uiterator& u) { + int c; + for (; b < e; ++ b) { + c = *b; + if ((0 <= c && c < 0x20) || c == 0x7f) { // [\x00-\x1f\x7f] + u = b + 1; + return true; } - b = m[0].second; } - if (b != e) { - u += ustring (b, e); + u = e; + return false; +} + +ustring logText (const ustring& text) { + SplitterFn sp (text, findCtrlChar); + if (sp.nextSep ()) { + ustring ans; + do { + if (sp.preSize () > 0) + ans.append (sp.pre ()); + if (*sp.end () == '\n') { + ans.append (CharConst ("//")); + } else { + ans.append (uUScore); + } + } while (sp.nextSep ()); + if (sp.preSize () > 0) + ans.append (sp.pre ()); + return ans; + } else { + return text; } - return u; } void clipEnd (ustring& val, uregex& re1, uregex& re2) { @@ -317,15 +328,15 @@ void clipEnd (ustring& val, uregex& re1, uregex& re2) { } void clipWhiteEnd (ustring& val) { - static uregex re1 ("^(" UTF8_SPACE "|" UTF8_ZWSPACE "|" UTF8_IDEOSPACE ")+"); - static uregex re2 ("(" UTF8_SPACE "|" UTF8_ZWSPACE "|" UTF8_IDEOSPACE ")+$"); + uregex re1 ("^(" UTF8_SPACE "|" UTF8_ZWSPACE "|" UTF8_IDEOSPACE ")+"); + uregex re2 ("(" UTF8_SPACE "|" UTF8_ZWSPACE "|" UTF8_IDEOSPACE ")+$"); clipEnd (val, re1, re2); } void clipNLEnd (ustring& val) { - static uregex re1 ("^\\n+"); - static uregex re2 ("\\n+$"); + uregex re1 ("^\\n+"); + uregex re2 ("\\n+$"); clipEnd (val, re1, re2); } diff --git a/lib/utf8.h b/lib/utf8.h index 828edd8..ff9a106 100644 --- a/lib/utf8.h +++ b/lib/utf8.h @@ -2,6 +2,7 @@ #define UTF8_H #include "ustring.h" +#include "util_regex.h" #define UTF8_SPACE "\x20" #define UTF8_NBSPACE "\xc2\xa0" diff --git a/lib/util_apache.cc b/lib/util_apache.cc index 34ac371..df338b6 100644 --- a/lib/util_apache.cc +++ b/lib/util_apache.cc @@ -11,7 +11,7 @@ ustring apacheAbsolutePath (const ustring& url) { ustring ans; std::vector ary; std::vector::iterator it; - Splitter sp (url.begin (), url.end (), re_slash); + SplitterCh sp (url, '/'); uiterator b, e; bool fdirpath; size_t len = url.length (); @@ -24,10 +24,6 @@ ustring apacheAbsolutePath (const ustring& url) { if (isAbsolutePath (url)) { sp.next (); } else { -/* ustring e = getenvString (kSCRIPT_NAME); - ustring p = getenvString (kPATH_INFO); - if (p.length () > 0) e.append (p); -*/ // mod_rewriteで書き換えた時、元のURLを返す ustring e = getenvString (kREQUEST_URI); // REQUEST_URIはデコードされていない。 @@ -35,7 +31,7 @@ ustring apacheAbsolutePath (const ustring& url) { if (p != ustring::npos) e.resize (p); e = percentDecode (e); // ディレクトリパスをチェック前にデコードする。 - splitE (e.begin (), e.end (), re_slash, ary); + splitE (e.begin (), e.end (), '/', ary); if (ary.size () > 0 && ary.back ().length () > 0) { ary.pop_back(); } diff --git a/lib/util_check.cc b/lib/util_check.cc index a8b8b89..b8b5dd3 100644 --- a/lib/util_check.cc +++ b/lib/util_check.cc @@ -4,6 +4,7 @@ #include "httpconst.h" #include "motorconst.h" #include "ustring.h" +#include bool checkRe (const ustring& name, const uregex& re) { umatch m; @@ -15,7 +16,7 @@ bool checkRe (const ustring& name, const uregex& re) { } } -bool checkRe (const uiterator& b, const uiterator& e, const uregex& re) { +bool checkRe (uiterator b, uiterator e, const uregex& re) { umatch m; if (usearch (b, e, m, re)) { @@ -25,56 +26,58 @@ bool checkRe (const uiterator& b, const uiterator& e, const uregex& re) { } } -bool checkName (const ustring& name) { +bool matchName (const ustring& name) { static uregex re ("^" kWNAME "{0,31}$"); return (checkRe (name, re)); } -bool checkFilename (const ustring& name) { +bool matchFilename (const ustring& name) { static uregex re ("^" kWNAME "{1,127}(\\." kWORD "{1,16})?$"); return (checkRe (name, re)); } -bool checkResourceName (const ustring& name) { +bool matchResourceName (const ustring& name) { // static uregex re ("^(" kWNAME "{0,127}/)*" kWNAME "{0,127}(\\." kWORD "{1,16})?$"); static uregex re ("^([a-zA-Z0-9_][a-zA-Z0-9_.-]{0,127}/)*[a-zA-Z0-9_][a-zA-Z0-9_.-]{0,127}(\\." kWORD "{1,16})?$"); return (checkRe (name, re)); } -bool checkAbsoluteResourceName (const ustring& name) { +bool matchAbsoluteResourceName (const ustring& name) { static uregex re ("^/(" kFName "/)*" kFName "$"); return (checkRe (name, re)); } -bool checkASCII (const ustring& name) { - static uregex re ("[^ -\\x7e]"); +static bool isPrintableAscii (int c) { + return 0x20 <= c && c <= 0x7e; +} - return (! checkRe (name, re)); +bool matchASCII (uiterator b, uiterator e) { // [ -\x7e] + return matchWordFn (b, e, isPrintableAscii); } -bool checkIP (const ustring& name) { +bool matchIP (const ustring& name) { static uregex re ("^[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}$"); return (checkRe (name, re)); } -bool checkDomain_dot (const ustring& name) { +bool matchDomain_dot (const ustring& name) { static uregex re ("^\\.?([a-zA-Z0-9-]+\\.)*([a-zA-Z0-9-]+)$"); return (checkRe (name, re)); } -bool checkHostname (const ustring& name) { +bool matchHostname (const ustring& name) { static uregex re ("^[a-zA-Z0-9][a-zA-Z0-9\\-]*(\\.[a-zA-Z0-9][a-zA-Z0-9\\-]*)*$"); return (checkRe (name, re)); } -bool checkMimeType (const ustring& name) { +bool matchMimeType (const ustring& name) { static uregex re ("^[a-z_0-9-]+/[a-z_0-9.+*-]+$"); return (checkRe (name, re)); @@ -86,47 +89,43 @@ bool checkMailAddr (const ustring& name) { return (checkRe (name, re)); } -bool checkAlNum (const uiterator& b, const uiterator& e) { - static uregex re ("^[a-zA-Z_0-9]+$"); - - return (checkRe (b, e, re)); -} - -bool checkNum (const ustring& text) { - return (checkNum (text.begin (), text.end ())); -} - -bool checkNum (const uiterator& b, const uiterator& e) { - return (checkRe (b, e, re_digits)); +bool matchAlNum (uiterator b, uiterator e) { + static char table_alnum[] = { // [a-zA-Z_0-9] + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + }; + return matchWordTbl (b, e, table_alnum); +} + +bool matchNum (uiterator b, uiterator e) { + for (; b < e; ++ b) { + if (! isdigit (*b)) + return false; + } + return true; } -bool checkWidth (const uiterator& b, const uiterator& e) { +bool matchWidth (uiterator b, uiterator e) { static uregex re_width ("^[0-9]+(\\.[0-9]+)?(%|px|pt|in|mm|cm|em|ex)?$"); return (checkRe (b, e, re_width)); } -bool checkWidth (const ustring& text) { - return (checkWidth (text.begin (), text.end ())); -} - -bool checkColor (const uiterator& b, const uiterator& e) { +bool checkColor (uiterator b, uiterator e) { static uregex re_color ("^#([0-9a-fA-F]{3}){1,2}$"); return (checkRe (b, e, re_color)); } -bool checkColor (const ustring& text) { - return (checkColor (text.begin (), text.end ())); -} - -bool checkWikiID (const uiterator& b, const uiterator& e) { +bool matchWikiID (uiterator b, uiterator e) { static uregex re_wikiid ("^" rWikiID "$"); return (checkRe (b, e, re_wikiid)); } -bool checkWikiID (const ustring& text) { - return (checkWikiID (text.begin (), text.end ())); -} - bool checkAry (const ustring& name) { return (name.length () > 0 && name[0] == '@'); } diff --git a/lib/util_check.h b/lib/util_check.h index ece9385..3918cdb 100644 --- a/lib/util_check.h +++ b/lib/util_check.h @@ -2,6 +2,7 @@ #define UTIL_CHECK_H #include "ustring.h" +#include "util_regex.h" #define UA_Windows 0x00000001 #define UA_Mac 0x00000002 @@ -13,26 +14,34 @@ #define UA_NetFront 0x00001000 bool checkRe (const ustring& name, const uregex& re); -bool checkRe (const uiterator& b, const uiterator& e, const uregex& re); -bool checkName (const ustring& name); -bool checkFilename (const ustring& name); -bool checkResourceName (const ustring& name); -bool checkAbsoluteResourceName (const ustring& name); -bool checkASCII (const ustring& name); -bool checkIP (const ustring& name); -bool checkDomain_dot (const ustring& name); -bool checkHostname (const ustring& name); -bool checkMimeType (const ustring& name); +bool checkRe (uiterator b, uiterator e, const uregex& re); +bool matchName (const ustring& name); +bool matchFilename (const ustring& name); +bool matchResourceName (const ustring& name); +bool matchAbsoluteResourceName (const ustring& name); +bool matchASCII (uiterator b, uiterator e); +bool matchIP (const ustring& name); +bool matchDomain_dot (const ustring& name); +bool matchHostname (const ustring& name); +bool matchMimeType (const ustring& name); bool checkMailAddr (const ustring& name); -bool checkAlNum (const uiterator& b, const uiterator& e); -bool checkNum (const ustring& text); // [0-9]+ -bool checkNum (const uiterator& b, const uiterator& e); -bool checkWidth (const uiterator& b, const uiterator& e); -bool checkWidth (const ustring& text); -bool checkColor (uiterator& b, uiterator& e); -bool checkColor (const ustring& text); -bool checkWikiID (const uiterator& b, const uiterator& e); -bool checkWikiID (const ustring& text); +bool matchAlNum (uiterator b, uiterator e); +bool matchNum (uiterator b, uiterator e); // [0-9]+ +inline bool matchNum (const ustring& text) { + return matchNum (text.begin (), text.end ()); +} +bool matchWidth (uiterator b, uiterator e); +inline bool matchWidth (const ustring& text) { + return matchWidth (text.begin (), text.end ()); +} +bool checkColor (uiterator b, uiterator e); +inline bool checkColor (const ustring& text) { + return checkColor (text.begin (), text.end ()); +} +bool matchWikiID (uiterator b, uiterator e); +inline bool matchWikiID (const ustring& text) { + return matchWikiID (text.begin (), text.end ()); +} bool checkAry (const ustring& name); bool checkAry (const ustring& name, ustring& sym); bool isHTTPS (); diff --git a/lib/util_const.cc b/lib/util_const.cc index c116663..353a88c 100644 --- a/lib/util_const.cc +++ b/lib/util_const.cc @@ -83,20 +83,6 @@ const char* WStr[] = { "Thursday", "Friday", "Saturday" }; -uregex re_tab ("\t"); -uregex re_lf ("\\n"); -uregex re_nl ("\\r\\n?|\\n"); -uregex re_slash ("/"); -uregex re_colon (":"); -uregex re_comma (","); -//uregex re_amp ("&"); -//uregex re_eq ("="); -uregex re_digits ("^[0-9]+$"); -uregex re_realnumber ("^[+-]?[0-9]+(\\.[0-9]*)?([eE][+-]?[0-9]+)?"); -uregex re_symbol ("^[^\\x00- \"'();\\[\\]\\{\\}]+"); -uregex re_nonsymbolchar ("[\\x00- \"'();\\[\\]\\{\\}\\\\]"); // \を含める -uregex re_q ("\""); - MNode* mlTrue = NULL; static MNodePtr trueHolder; diff --git a/lib/util_const.h b/lib/util_const.h index 2ad1762..2d79912 100644 --- a/lib/util_const.h +++ b/lib/util_const.h @@ -72,19 +72,6 @@ extern ustring uHttps; extern ustring uDefault; extern ustring uAssoc; -extern uregex re_tab; -extern uregex re_lf; -extern uregex re_nl; -extern uregex re_slash; -extern uregex re_colon; -extern uregex re_comma; -//extern uregex re_amp; -//extern uregex re_eq; -extern uregex re_digits; -extern uregex re_realnumber; -extern uregex re_symbol; -extern uregex re_nonsymbolchar; -extern uregex re_q; extern MNode* mlTrue; #endif /* UTIL_CONST_H */ diff --git a/lib/util_file.cc b/lib/util_file.cc index 0b337e4..6c95440 100644 --- a/lib/util_file.cc +++ b/lib/util_file.cc @@ -2,6 +2,7 @@ #include "config.h" #include "ustring.h" #include "util_const.h" +#include "util_splitter.h" #include "filemacro.h" #include #include @@ -84,21 +85,10 @@ void writeFile (const ustring& filename, ustring& data) { top must end with slash. */ void makeSubdir (ustring& top, const ustring& sub) { - uiterator b, e; - umatch m; + SplitterCh sp (sub, '/'); - b = sub.begin (); - e = sub.end (); - while (b != e && usearch (b, e, m, re_slash)) { - top.append (b, m[0].second); - mkdir (top.c_str (), 0777); -#ifdef DEBUG2 - std::cerr << "mkdir:" << top << "\n"; -#endif /* DEBUG */ - b = m[0].second; - } - if (b != e) { - top.append (b, e); + while (sp.next ()) { + top.append (sp.b, sp.u); mkdir (top.c_str (), 0777); #ifdef DEBUG2 std::cerr << "mkdir:" << top << "\n"; diff --git a/lib/util_random.cc b/lib/util_random.cc index 0c177fc..7c4db3b 100644 --- a/lib/util_random.cc +++ b/lib/util_random.cc @@ -6,6 +6,7 @@ #include #include #include +#include static int Inited = 0; static unsigned long Seed; diff --git a/lib/util_regex.h b/lib/util_regex.h new file mode 100644 index 0000000..31a747e --- /dev/null +++ b/lib/util_regex.h @@ -0,0 +1,17 @@ +#ifndef UTIL_REGEXP_H +#define UTIL_REGEXP_H + +#include "ustring.h" +#include + +typedef boost::match_results umatch; +typedef boost::basic_regex > uregex; + +inline bool usearch (ustring::const_iterator first, ustring::const_iterator last, umatch& m, const uregex& re, boost::match_flag_type flags = boost::regex_constants::match_single_line) { + return regex_search (first, last, m, re, flags); +} +inline bool usearch (const ustring& s, umatch& m, const uregex& re, boost::match_flag_type flags = boost::regex_constants::match_single_line) { + return regex_search (s.begin (), s.end (), m, re, flags); +} + +#endif /* UTIL_REGEXP_H */ diff --git a/lib/util_splitter.h b/lib/util_splitter.h index 59686b6..9cb6aaa 100644 --- a/lib/util_splitter.h +++ b/lib/util_splitter.h @@ -2,37 +2,76 @@ #define UTIL_SPLITTER_H #include "ustring.h" +#include "util_string.h" class Splitter { public: - uregex* re; - uiterator b; - uiterator t; - uiterator u; - uiterator e; - umatch m; + uiterator b; // 先頭 + uiterator t; // 区切り文字列先頭 + uiterator u; // 区切り文字列末尾 + uiterator e; // 末尾 - Splitter (const ustring& text, uregex& r) { - b = t = u = text.begin (); - e = text.end (); - re = &r; - }; - Splitter (uiterator pb, uiterator pe, uregex& r) { - b = t = u = pb; - e = pe; - re = &r; + Splitter (uiterator _begin, uiterator _end) { + b = t = u = _begin; + e = _end; }; virtual ~Splitter () {}; - virtual void init (uiterator pb, uiterator pe) { - b = t = u = pb; - e = pe; - }; + virtual bool isEnd () { return b == e; }; + virtual uiterator begin () { + return b; + }; + virtual uiterator end () { + return t; + }; + virtual ustring pre () { + return ustring (b, t); + }; + virtual size_t preSize () { + return t - b; + }; + virtual uiterator matchBegin () { + return t; + }; + virtual uiterator matchEnd () { + return u; + }; + virtual uiterator eol () { + return e; + }; + virtual void rewind (int i) { + int n = u - t; + if (n > i) { + u -= i; + } else { + u -= n; + } + }; + virtual void shiftCursor () { + b = u; + }; + virtual bool next () = 0; + virtual bool nextSep () = 0; +}; + +class SplitterRe: public Splitter { + public: + uregex* re; + umatch m; + + SplitterRe (const ustring& text, uregex& _re): Splitter (text.begin (), text.end ()) { + re = &_re; + }; + SplitterRe (uiterator _begin, uiterator _end, uregex& _re): Splitter (_begin, _end) { + re = &_re; + }; + virtual ~SplitterRe () {}; + virtual bool next () { b = u; - if (b != e) { + if (b < e) { if (usearch (b, e, m, *re)) { t = m[0].first; u = m[0].second; @@ -47,14 +86,13 @@ class Splitter { }; virtual bool nextSep () { b = u; - if (b != e) { + if (b < e) { if (usearch (b, e, m, *re)) { t = m[0].first; u = m[0].second; return true; } else { - t = e; - u = e; + t = u = e; return false; } } else { @@ -62,41 +100,21 @@ class Splitter { return false; } }; - virtual uiterator begin () { - return b; - }; - virtual uiterator end () { - return t; - }; - virtual ustring cur () { - return ustring (b, t); - }; virtual bool match (int index) { return (t != u && m[index].matched); } virtual uiterator matchBegin () { return t; }; - virtual uiterator matchBegin (int index) { - return m[index].first; - }; virtual uiterator matchEnd () { return u; }; + virtual uiterator matchBegin (int index) { + return m[index].first; + }; virtual uiterator matchEnd (int index) { return m[index].second; }; - virtual uiterator eol () { - return e; - }; - virtual void rewind (int i) { - int n = u - t; - if (n > i) { - u -= i; - } else { - u -= n; - } - }; virtual bool nextSearch () { if (u != e) { if (usearch (u, e, m, *re)) { @@ -114,8 +132,147 @@ class Splitter { return false; } }; - virtual void shiftCursor () { - b = u; +}; + +class SplitterCh: public Splitter { + public: + int ch; + + SplitterCh (uiterator _begin, uiterator _end, int _ch): Splitter (_begin, _end) { + ch = _ch; + }; + SplitterCh (const ustring& text, int _ch): Splitter (text.begin (), text.end ()) { + ch = _ch; + }; + virtual ~SplitterCh () {}; + + virtual bool next () { + b = t = u; + if (b < e) { + if (findChar (t, e, ch)) { + u = t + 1; + } else { + u = e; + } + return true; + } else { + return false; + } + }; + virtual bool nextSep () { + b = t = u; + if (b < e) { + if (findChar (t, e, ch)) { + u = t + 1; + return true; + } else { + u = e; + return false; + } + } else { + t = u = e; + return false; + } + }; +}; + +class SplitterChars: public Splitter { + public: + ustring pattern; + + SplitterChars (uiterator _begin, uiterator _end, const ustring& _pat): Splitter (_begin, _end) { + pattern = _pat; + }; + SplitterChars (const ustring& text, const ustring& _pat): Splitter (text.begin (), text.end ()) { + pattern = _pat; + }; + virtual ~SplitterChars () {}; + + virtual bool next () { + b = t = u; + if (b < e) { + if (findChars (t, e, pattern)) { + u = t + 1; + } else { + u = e; + } + return true; + } else { + return false; + } + }; + virtual bool nextSep () { + b = t = u; + if (b < e) { + if (findChars (t, e, pattern)) { + u = t + 1; + return true; + } else { + u = e; + return false; + } + } else { + t = u = e; + return false; + } + }; +}; + +class SplitterFn: public Splitter { + public: + bool (*fn) (uiterator&, uiterator, uiterator&); + + SplitterFn (uiterator _begin, uiterator _end, bool (*_fn)(uiterator&, uiterator, uiterator&)): Splitter (_begin, _end) { + fn = _fn; + }; + SplitterFn (const ustring& text, bool (*_fn)(uiterator&, uiterator, uiterator&)): Splitter (text.begin (), text.end ()) { + fn = _fn; + }; + virtual ~SplitterFn () {}; + + virtual bool next () { + b = t = u; + if (b < e) { + fn (t, e, u); + return true; + } else { + return false; + } + }; + virtual bool nextSep () { + b = t = u; + if (b < e) { + return fn (t, e, u); + } else { + t = u = e; + return false; + } + }; +}; + +class SplitterNL: public Splitter { + public: + SplitterNL (uiterator _begin, uiterator _end): Splitter (_begin, _end) {}; + SplitterNL (const ustring& text): Splitter (text.begin (), text.end ()) {}; + virtual ~SplitterNL () {}; + + virtual bool next () { + b = t = u; + if (b < e) { + findNL (t, e, u); + return true; + } else { + return false; + } + }; + virtual bool nextSep () { + b = t = u; + if (b < e) { + return findNL (t, e, u); + } else { + t = u = e; + return false; + } }; }; diff --git a/lib/util_string.cc b/lib/util_string.cc index 948bd2c..cebdd9b 100644 --- a/lib/util_string.cc +++ b/lib/util_string.cc @@ -62,11 +62,13 @@ ustring UIConv::cv (const ustring& text) { return ans; } +static bool isDigit (int c) { + return '0' <= c && c <= '9'; +} + ustring c3 (const ustring& str) { bool qsign = false; - static uregex re ("^[0-9]+"); - uiterator b, e; - umatch m; + uiterator b, e, t; b = str.begin (); e = str.end (); @@ -74,16 +76,16 @@ ustring c3 (const ustring& str) { qsign = true; b = b + 1; } - if (usearch (b, e, m, re)) { - int n = m[0].second - m[0].first; + t = b; + if (matchHeadFn (t, e, isDigit)) { + int n = t - b; int l = str.size () + n / 3; ustring ans; - ans.reserve (l); if (qsign) { ans.append (1, str[0]); } - for (; b != m[0].second; b ++) { + for (; b < t; ++ b) { ans.append (1, *b); if (n > 1 && n % 3 == 1) { ans.append (CharConst (",")); @@ -184,124 +186,160 @@ ustring urldecode_nonul (const ustring& str) { return ans; } -ustring omitPattern (const ustring& text, uregex& re) { - Splitter sp (text, re); - - if (sp.next ()) { - if (sp.match (0)) { - ustring ans; - ans.reserve (text.length ()); - if (sp.begin () != sp.end ()) - ans.append (sp.begin (), sp.end ()); - while (sp.next ()) { - if (sp.begin () != sp.end ()) - ans.append (sp.begin (), sp.end ()); - } - return ans; - } else { - return text; - } - } else { +static ustring omitPattern (const ustring& text, int (*fn)(int)) { + uiterator b = text.begin (); + uiterator e = text.end (); + uiterator p = b; + for (; p < e; ++ p) { + if (fn (*p)) + break; + } + if (p == e) { return text; + } else { + ustring ans; + ans.reserve (text.length ()); + ans.assign (b, p); + ++ p; + for (; p < e; ++ p) { + if (! fn (*p)) + ans.append (1, *p); + } + return ans; } } ustring omitCtrl (const ustring& str) { - static uregex re ("[\\x00-\\x1f\\x7f]+"); - return omitPattern (str, re); + return omitPattern (str, iscntrl); +} + +static int iscntrlx (int c) { + static char table_ctrlx[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + }; + if (0 <= c && c < 128) + return table_ctrlx[c]; + return 0; } ustring omitCtrlX (const ustring& str) { - static uregex re ("[^\\x09\\x0a\\x20-\\x7e\\x80-\\xff]+"); - return omitPattern (str, re); + return omitPattern (str, iscntrlx); +} + +static int isNUL (int c) { + return c == 0; } ustring omitNul (const ustring& str) { - static uregex re ("[\\x00]+"); - return omitPattern (str, re); + return omitPattern (str, isNUL); +} + +static int iscrlfchar (int c) { + return c == 0x0a || c == 0x0d; } ustring omitNL (const ustring& str) { - return omitPattern (str, re_nl); + return omitPattern (str, iscrlfchar); +} + +static int isnonasciichar (int c) { + return c < 0x20 || c > 0x7e; } ustring omitNonAscii (const ustring& str) { - static uregex re ("[^ -\\x7e]+"); - return omitPattern (str, re); + return omitPattern (str, isnonasciichar); +} + +static int isnonasciiword (int c) { + return c < 0x21 || c > 0x7e; } ustring omitNonAsciiWord (const ustring& str) { - static uregex re ("[^\\x21-\\x7e]+"); - return omitPattern (str, re); + return omitPattern (str, isnonasciiword); } -static ustring percentEncode (uiterator b, uiterator e, const uregex& re) { - // $1 -> _ - // $2 -> %HEX - umatch m; +static ustring percentEncode (Splitter& sp) { ustring ans; - - while (b < e && usearch (b, e, m, re)) { - if (b < m[0].first) - ans.append (b, m[0].first); - if (m[1].matched) { + int c; + while (sp.nextSep ()) { + if (sp.preSize () > 0) + ans.append (sp.pre ()); + c = *sp.matchBegin (); + if (c == '\0') { ans.append (uUScore); - } else if (m[2].matched) { - ans.append (percentHEX (*m[2].first)); } else { - assert (0); + ans.append (percentHEX (c)); } - b = m[0].second; } - if (b < e) - ans.append (b, e); - + if (sp.preSize () > 0) + ans.append (sp.pre ()); return ans; } -ustring percentEncode (uiterator b, uiterator e) { - static uregex re ("(\\x00)|([^A-Za-z0-9_.~-])"); - - return percentEncode (b, e, re); -} - -ustring percentEncode_path (uiterator b, uiterator e) { - static uregex re ("(\\x00)|([^A-Za-z0-9_/.~-])"); - - return percentEncode (b, e, re); -} - -ustring percentEncode (const ustring& str) { - return percentEncode (str.begin (), str.end ()); -} - -ustring percentEncode_path (const ustring& str) { - return percentEncode_path (str.begin (), str.end ()); +static bool findPercentChar (uiterator& b, uiterator e, uiterator& u) { + static char table_percentchar[] = { // (\x00)|([^A-Za-z0-9_.~\-]) + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, + }; + int c; + for (; b < e; ++ b) { + c = *b; + if (c < 0 || c >= 128 || table_percentchar[c]) { + u = b + 1; + return true; + } + } + u = e; + return false; } -#if 0 -ustring percentEncode_path (uiterator b, uiterator e) { - uiterator i; - ustring ans; - - for (i = b; i < e; i ++) { - if (*i == '/') { - if (b < i) - ans.append (percentEncode (b, i)); - ans.append (uSlash); - b = i + 1; +ustring percentEncode (uiterator b, uiterator e) { +// static uregex re ("(\\x00)|([^A-Za-z0-9_.~-])"); + SplitterFn sp (b, e, findPercentChar); + return percentEncode (sp); +} + +static bool findPercentPathChar (uiterator& b, uiterator e, uiterator& u) { + static char table_percentpathchar[] = { // (\x00)|([^A-Za-z0-9_\/.~\-]) + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, + }; + int c; + for (; b < e; ++ b) { + c = *b; + if (c < 0 || c >= 128 || table_percentpathchar[c]) { + u = b + 1; + return true; } } - if (b < e) - ans.append (percentEncode (b, e)); - - return ans; + u = e; + return false; } -ustring percentEncode_path (const ustring& str) { - return percentEncode_path (str.begin (), str.end ()); +ustring percentEncode_path (uiterator b, uiterator e) { +// static uregex re ("(\\x00)|([^A-Za-z0-9_/.~-])"); + SplitterFn sp (b, e, findPercentPathChar); + return percentEncode (sp); } -#endif ustring percentDecode (const ustring& str) { ustring ans; @@ -330,10 +368,33 @@ ustring percentDecode (const ustring& str) { return fixUTF8 (ans); } -ustring cookieencode (const ustring& text) { - static uregex re ("([\\x00-\\x1f\\x7f])|([ ,;%\\x80-\\xff])"); +static bool findCookieEncChar (uiterator& b, uiterator e, uiterator& u) { + static char table_cookieencode[] = { // ([\\x00-\\x1f\\x7f])|([ ,;%\\x80-\\xff]) + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + }; + int c; + for (; b < e; ++ b) { + c = *b; + if (c < 0 || c >= 128 || table_cookieencode[c]) { + u = b + 1; + return true; + } + } + u = e; + return false; +} - return percentEncode (text.begin (), text.end (), re); +ustring cookieencode (const ustring& text) { +// static uregex re ("([\\x00-\\x1f\\x7f])|([ ,;%\\x80-\\xff])"); + SplitterFn sp (text.begin (), text.end (), findCookieEncChar); + return percentEncode (sp); } ustring cookiedecode (const ustring& text) { @@ -392,21 +453,40 @@ ustring filePart_osSafe (const ustring& path) { } void split (uiterator b, uiterator e, uregex& re, std::vector& ans) { - Splitter sp (b, e, re); + SplitterRe sp (b, e, re); while (sp.next ()) { - ans.push_back (sp.cur ()); + ans.push_back (sp.pre ()); + } +} + +void split (uiterator b, uiterator e, int ch, std::vector& ans) { + SplitterCh sp (b, e, ch); + + while (sp.next ()) { + ans.push_back (sp.pre ()); } } void splitE (uiterator b, uiterator e, uregex& re, std::vector& ans) { - Splitter sp (b, e, re); + SplitterRe sp (b, e, re); - if (b != e) { + if (b < e) { while (sp.nextSep ()) { - ans.push_back (sp.cur ()); + ans.push_back (sp.pre ()); } - ans.push_back (ustring (sp.begin (), sp.eol ())); + ans.push_back (sp.pre ()); + } +} + +void splitE (uiterator b, uiterator e, int ch, std::vector& ans) { + SplitterCh sp (b, e, ch); + + if (b < e) { + while (sp.nextSep ()) { + ans.push_back (sp.pre ()); + } + ans.push_back (sp.pre ()); } } @@ -634,7 +714,7 @@ ustring jsEncode (const ustring& str) { ustring filenameEncode (const ustring& text) { static uregex re ("([\\x00-\\x1f\\x7f])|([^a-zA-Z0-9._-])|(^\\.+)"); - Splitter sp (text, re); + SplitterRe sp (text, re); ustring ans; int c; @@ -664,7 +744,7 @@ ustring filenameEncode (const ustring& text) { ustring filenameDecode (const ustring& text) { static uregex re (":([0-9a-fA-F][0-9a-fA-F])"); - Splitter sp (text, re); + SplitterRe sp (text, re); ustring ans; int c; @@ -810,14 +890,16 @@ uint32_t hextoul (uiterator b, uiterator e) { ustring toCRLF (const ustring& str) { uiterator b = str.begin (); uiterator e = str.end (); - umatch m; + uiterator p; ustring ans; - while (usearch (b, e, m, re_lf)) { - ans.append (b, m[0].first).append (uCRLF); - b = m[0].second; + p = b; + while (findChar (b, e, '\n')) { + ans.append (p, b).append (uCRLF); + p = ++ b; } - ans.append (b, e); + if (p < e) + ans.append (p, e); return ans; } @@ -884,18 +966,14 @@ static ustring colpad0 (int n, const ustring& src) { ${W}, ${w} ${o} */ -//ustring formatDateString (const ustring& format, time_t tm) { ustring formatDateString (const ustring& format, struct tm& v) { ustring ans; -// struct tm v; uiterator b, e; umatch m; int pc; -// static uregex re ("\\$\\{([YMDhmsWw])(:([0-9]))?\\}"); static uregex re ("\\$\\{(([YMDhmsWwo])(:([0-9]))?|M:((name)|(ab)|(abname)))\\}"); std::vector fpar; -// localtime_r (&tm, &v); b = format.begin (); e = format.end (); while (usearch (b, e, m, re)) { @@ -908,14 +986,11 @@ ustring formatDateString (const ustring& format, struct tm& v) { ans.append (MStr_a[v.tm_mon]); } } else { -// if (m[2].matched) { if (m[3].matched) { -// pc = strtol (ustring (m[3].first, m[3].second)); pc = strtol (ustring (m[4].first, m[4].second)); } else { pc = 0; } -// switch (*m[1].first) { switch (*m[2].first) { case 'Y': ans.append (colpad0 (pc, to_ustring (v.tm_year + 1900))); @@ -1006,3 +1081,119 @@ ustring octchar (int c) { ans[0] = (c & 0x3) + '0'; return ans; } + +bool findNL (uiterator& b, uiterator e, uiterator& u) { + for (; b < e; ++ b) { + if (*b == '\n') { + u = b + 1; + return true; + } else if (*b == '\r') { + u = b + 1; + if (u < e && *u == '\n') + ++ u; + return true; + } + } + u = e; + return false; +} + +bool findNLb (uiterator& b, uiterator e) { + for (; b < e; ++ b) { + if (*b == '\n') { + ++ b; + return true; + } else if (*b == '\r') { + ++ b; + if (b < e && *b == '\n') + ++ b; + return true; + } + } + return false; +} + +bool findChar (uiterator& b, uiterator e, int ch) { + for (; b < e; ++ b) { + if (*b == ch) { + return true; + } + } + return false; +} + +bool findChars (uiterator& b, uiterator e, const ustring& pattern) { + for (; b < e; ++ b) { + if (pattern.find (*b) != ustring::npos) { + return true; + } + } + return false; +} + +bool findCharFn (uiterator& b, uiterator e, bool (*fn)(int)) { + for (; b < e; ++ b) { + if (fn (*b)) + return true; + } + return false; +} + +bool findSepColon (uiterator& b, uiterator e, uiterator& u) { + // " *; *"を探索する。bは進む + uiterator p = b; + if (findChar (b, e, ';')) { + u = b + 1; + while (p < b && *(b - 1) == ' ') + -- b; + while (u < e && *u == ' ') + ++ u; + return true; + } + u = e; + return false; +} + +bool matchHeadFn (uiterator& b, uiterator e, bool (*fn)(int)) { + if (b < e && fn (*b)) { + do { + ++ b; + } while (b < e && fn (*b)); + return true; + } + return false; +} + +bool matchWordTbl (uiterator b, uiterator e, char* tbl) { + int c; + if (b < e) { + do { + c = *b; + if (0 <= c && c < 128 && tbl[c]) { // 128〜はfalse + } else { + return false; + } + ++ b; + } while (b < e); + return true; + } else { + return false; + } +} + +bool matchWordFn (uiterator b, uiterator e, bool (*fn)(int)) { + int c; + if (b < e) { + do { + c = *b; + if (0 <= c && c < 128 && fn (c)) { + } else { + return false; + } + ++ b; + } while (b < e); + return true; + } else { + return false; + } +} diff --git a/lib/util_string.h b/lib/util_string.h index 9f7f8e9..e0207b3 100644 --- a/lib/util_string.h +++ b/lib/util_string.h @@ -2,6 +2,7 @@ #define UTIL_STRING_H #include "ustring.h" +#include "util_regex.h" #include #include "iconv_glue.h" #include @@ -51,7 +52,6 @@ inline uint32_t to_uint32 (const ustring& v) { } ustring percentHEX (int c); ustring urldecode_nonul (const ustring& str); -ustring omitPattern (const ustring& text, uregex& re); ustring omitCtrl (const ustring& str); ustring omitCtrlX (const ustring& str); ustring omitNul (const ustring& str); @@ -59,9 +59,13 @@ ustring omitNL (const ustring& str); ustring omitNonAscii (const ustring& str); ustring omitNonAsciiWord (const ustring& str); ustring percentEncode (uiterator b, uiterator e); -ustring percentEncode (const ustring& str); +inline ustring percentEncode (const ustring& str) { + return percentEncode (str.begin (), str.end ()); +} ustring percentEncode_path (uiterator b, uiterator e); -ustring percentEncode_path (const ustring& str); +inline ustring percentEncode_path (const ustring& str) { + return percentEncode_path (str.begin (), str.end ()); +} ustring percentDecode (const ustring& str); ustring cookieencode (const ustring& text); ustring cookiedecode (const ustring& text); @@ -69,7 +73,9 @@ ustring clipColon (const ustring& text); ustring dirPart (const ustring& path); ustring filePart_osSafe (const ustring& path); void split (uiterator b, uiterator e, uregex& re, std::vector& ans); +void split (uiterator b, uiterator e, int ch, std::vector& ans); void splitE (uiterator b, uiterator e, uregex& re, std::vector& ans); +void splitE (uiterator b, uiterator e, int ch, std::vector& ans); bool splitChar (uiterator b, uiterator e, uiterator::value_type ch, uiterator& m1); ustring filenameEncode (const ustring& text); ustring filenameDecode (const ustring& text); @@ -112,4 +118,14 @@ ustring hexEncode (const ustring& data); int octchar (uiterator b); ustring octchar (int c); +bool findNL (uiterator& b, uiterator e, uiterator& u); +bool findNLb (uiterator& b, uiterator e); +bool findChar (uiterator& b, uiterator e, int ch); +bool findChars (uiterator& b, uiterator e, const ustring& pattern); +bool findCharFn (uiterator& b, uiterator e, bool (*fn)(int)); +bool findSepColon (uiterator& b, uiterator e, uiterator& u); +bool matchHeadFn (uiterator& b, uiterator e, bool (*fn)(int)); +bool matchWordTbl (uiterator b, uiterator e, char* tbl); +bool matchWordFn (uiterator b, uiterator e, bool (*fn)(int)); + #endif /* UTIL_STRING_H */ diff --git a/lib/util_tcp.cc b/lib/util_tcp.cc index a4e6bd0..4bba0fd 100644 --- a/lib/util_tcp.cc +++ b/lib/util_tcp.cc @@ -570,7 +570,7 @@ ssize_t SslClient::read2 (void* buf, size_t nbytes) { bool ProxySslClient::connect2 () { ustring msg; - if (checkHostname (ephost->host)) { + if (matchHostname (ephost->host)) { msg.assign (CharConst ("CONNECT ")); msg.append (ephost->host).append (uColon).append (to_ustring (ephost->port)).append (CharConst (" HTTP/1.0" kCRLF)); msg.append (CharConst ("Host: ")).append (ephost->host).append (uCRLF); diff --git a/lib/util_tcp.h b/lib/util_tcp.h index acd1f65..bba5bd0 100644 --- a/lib/util_tcp.h +++ b/lib/util_tcp.h @@ -2,6 +2,7 @@ #define UTIL_TCP_H #include "ustring.h" +#include #include #include #include diff --git a/modules/ml-addon.cc b/modules/ml-addon.cc index 33d2b92..d106da8 100644 --- a/modules/ml-addon.cc +++ b/modules/ml-addon.cc @@ -9,6 +9,7 @@ #include "util_const.h" #include "util_check.h" #include "util_string.h" +#include "util_splitter.h" #include "util_proc.h" #include "ustring.h" #include "expr.h" @@ -67,7 +68,15 @@ public: ustring u; while (rest) { u = eval_str (rest->car (), mlenv); - u = regex_replace (u, re_nl, uSPC, boost::regex_constants::match_single_line); + { + ustring ans; + SplitterNL sp (u); + while (sp.nextSep ()) { + ans.append (sp.pre ()).append (uSPC); + } + ans.append (sp.pre ()); + u = ans; + } par.push_back (u); nextNode (rest); } @@ -104,7 +113,7 @@ public: cmd = eval_str (params[0], mlenv); type = eval_asciiword (keywords[1], mlenv); evkw_bool (2, fContinue); - if (! checkMimeType (type)) + if (! matchMimeType (type)) type.resize (0); }; }; @@ -116,7 +125,7 @@ static void addon_sub2 (MlEnv* mlenv, AddonParams& o) { if (o.cmd.size () == 0) throw (uErrorCmdNameEmpty); - if (! checkFilename (o.cmd)) + if (! matchFilename (o.cmd)) throw (o.cmd + uErrorBadCmd); { @@ -173,20 +182,25 @@ MNode* ml_addon_tab (MNode* cell, MlEnv* mlenv) { addon_sub2 (mlenv, o); { ustring a; - uiterator b, e; - umatch m; + uiterator b, e, p; +// umatch m; MNodeList ans; o.proc.read (a); a = fixUTF8 (a); b = a.begin (); e = a.end (); - while (usearch (b, e, m, re_tab)) { - ans.append (newMNode_str (new ustring (b, m[0].first))); - b = m[0].second; +// while (usearch (b, e, m, re_tab)) { +// ans.append (newMNode_str (new ustring (b, m[0].first))); +// b = m[0].second; +// } + p = b; + while (findChar (b, e, '\t')) { + ans.append (newMNode_str (new ustring (p, b))); + p = ++ b; } - if (b != e) { - ans.append (newMNode_str (new ustring (b, e))); + if (p < e) { + ans.append (newMNode_str (new ustring (p, e))); } return ans.release (); } @@ -216,8 +230,8 @@ MNode* ml_addon_array_tab_nl (MNode* cell, MlEnv* mlenv) { addon_sub2 (mlenv, o); { ustring a, u; - uiterator b, e; - umatch m, m2; + uiterator b, e, p, p2; +// umatch m, m2; size_t n = 0; int i; @@ -225,36 +239,50 @@ MNode* ml_addon_array_tab_nl (MNode* cell, MlEnv* mlenv) { a = fixUTF8 (a); b = a.begin (); e = a.end (); - while (usearch (b, e, m, re_lf)) { + p = b; +// while (usearch (b, e, m, re_lf)) { + while (findChar (b, e, '\n')) { n ++; i = 0; - while (i < vars.size () && usearch (b, m[0].first, m2, re_tab)) { - h = newMNode_str (new ustring (b, m2[0].first)); +// while (i < vars.size () && usearch (b, m[0].first, m2, re_tab)) { +// h = newMNode_str (new ustring (b, m2[0].first)); + p2 = p; + while (i < vars.size () && findChar (p, b, '\t')) { + h = newMNode_str (new ustring (p2, p)); mlenv->setAry (vars[i], n, h.p); i ++; - b = m2[0].second; +// b = m2[0].second; + p2 = ++ p; } - if (i < vars.size () && b != m[0].first) { - h = newMNode_str (new ustring (b, m[0].first)); +// if (i < vars.size () && b != m[0].first) { +// h = newMNode_str (new ustring (b, m[0].first)); + if (i < vars.size () && p < b) { + h = newMNode_str (new ustring (p, b)); mlenv->setAry (vars[i], n, h.p); i ++; } for (; i < vars.size (); i ++) { mlenv->setAry (vars[i], n, NULL); } - b = m[0].second; +// b = m[0].second; + p = ++ b; } - if (b != e) { +// if (b != e) { + if (p < e) { n ++; i = 0; - while (i < vars.size () && usearch (b, e, m2, re_tab)) { - h = newMNode_str (new ustring (b, m2[0].first)); +// while (i < vars.size () && usearch (b, e, m2, re_tab)) { +// h = newMNode_str (new ustring (b, m2[0].first)); + p2 = p; + while (i < vars.size () && findChar (p, e, '\t')) { + h = newMNode_str (new ustring (p2, p)); mlenv->setAry (vars[i], n, h.p); i ++; - b = m2[0].second; +// b = m2[0].second; + p2 = ++ p; } - if (i < vars.size () && b != e) { - h = newMNode_str (new ustring (b, e)); + if (i < vars.size () && p < e) { + h = newMNode_str (new ustring (p, e)); mlenv->setAry (vars[i], n, h.p); i ++; } diff --git a/modules/ml-apache.cc b/modules/ml-apache.cc index 9502ab0..abb3f89 100644 --- a/modules/ml-apache.cc +++ b/modules/ml-apache.cc @@ -183,7 +183,7 @@ MNode* ml_get_http_header (MNode* cell, MlEnv* mlenv) { } } - if (checkAlNum (name.begin (), e) && name.length () < 128) { + if (matchAlNum (name.begin (), e) && name.length () < 128) { name = ustring (CharConst ("HTTP_")).append (name); char* e = getenv (name.c_str ()); if (e) { diff --git a/modules/ml-config.cc b/modules/ml-config.cc index 64b488f..ba63b2b 100644 --- a/modules/ml-config.cc +++ b/modules/ml-config.cc @@ -44,7 +44,7 @@ MNode* ml_datastore_list (MNode* cell, MlEnv* mlenv) { name.assign (de->d_name); #endif t.assign (CharConst (cDataTop kDS)).append (name).append (CharConst (kSubStore)); - if (checkName (name) && isDirectory (t)) { + if (matchName (name) && isDirectory (t)) { ans.append (newMNode_str (new ustring (name))); } } diff --git a/modules/ml-cookielogin.cc b/modules/ml-cookielogin.cc index ed8bd9f..adc7bc8 100644 --- a/modules/ml-cookielogin.cc +++ b/modules/ml-cookielogin.cc @@ -9,6 +9,7 @@ #include "util_check.h" #include "util_random.h" #include "util_string.h" +#include "util_splitter.h" #include "util_time.h" #include "sigsafe.h" #include "bdbmacro.h" @@ -27,52 +28,18 @@ */ static void splitRec (ustring& rec, ustring& id, ustring& limit, ustring& avail, ustring& group, ustring& ip) { - uiterator b = rec.begin (); - uiterator e = rec.end (); - umatch m; - - if (b == e || ! usearch (b, e, m, re_colon)) { - id.assign (b, e); - goto Ex2; - } - id.assign (b, m[0].first); - b = m[0].second; - if (b == e || ! usearch (b, e, m, re_colon)) { - limit.assign (b, e); - goto Ex3; - } - limit.assign (b, m[0].first); - b = m[0].second; - if (b == e || ! usearch (b, e, m, re_colon)) { - avail.assign (b, e); - goto Ex4; - } - avail.assign (b, m[0].first); - b = m[0].second; - if (b == e || ! usearch (b, e, m, re_colon)) { - group.assign (b, e); - goto Ex5; - } - group.assign (b, m[0].first); - b = m[0].second; - if (b == e || ! usearch (b, e, m, re_colon)) { - ip.assign (b, e); - } else { - ip.assign (b, m[0].first); - } - return; - -// Ex1: -// id.resize (0); - Ex2: - limit.resize (0); - Ex3: - avail.resize (0); - Ex4: - group.resize (0); - Ex5: - ip.resize (0); - + SplitterCh sp (rec, ':'); + + sp.nextSep (); + id = sp.pre (); + sp.nextSep (); + limit = sp.pre (); + sp.nextSep (); + avail = sp.pre (); + sp.nextSep (); + group = sp.pre (); + sp.nextSep (); + ip = sp.pre (); return; } @@ -151,7 +118,7 @@ MNode* ml_cookielogin (MNode* cell, MlEnv* mlenv) { if (name.size () == 0) throw (uErrorFilenameEmpty); - if (! checkName (name)) + if (! matchName (name)) throw (name + uErrorBadName); if (mlenv->env) { SigSafe sig; @@ -236,7 +203,7 @@ MNode* ml_cookielogin_login (MNode* cell, MlEnv* mlenv, MLFunc* mobj) { r.append (clipColon (id)).append (uColon); limit = now () + avail; r.append (to_ustring (limit)).append (uColon).append (to_ustring (avail)).append (uColon).append (clipColon (group)); - if (ip.size () > 0 && checkIP (ip)) { + if (ip.size () > 0 && matchIP (ip)) { r.append (uColon).append (ip); // IP } else { r.append (uColon); @@ -377,7 +344,6 @@ MNode* ml_cookielogin_delete (MNode* cell, MlEnv* mlenv, MLFunc* mobj) { MNode* arg = cell->cdr (); MLCookieLogin* obj = MObjRef (mobj, cMLCookieLoginID); ustring key, val; - umatch m; ustring id; std::vector keys; std::vector::iterator it; @@ -392,8 +358,9 @@ MNode* ml_cookielogin_delete (MNode* cell, MlEnv* mlenv, MLFunc* mobj) { obj->opendb (); obj->db.initeach (); while (obj->db.each (key, val)) { - if (usearch (val, m, re_colon)) { - if (match (val.begin (), m[0].first, id)) { + SplitterCh sp (val, ':'); + if (sp.nextSep ()) { // セパレータが存在するとき、idを含むレコード + if (sp.pre () == id) { keys.push_back (key); } } diff --git a/modules/ml-formvar.cc b/modules/ml-formvar.cc index bda1bb3..a030d81 100644 --- a/modules/ml-formvar.cc +++ b/modules/ml-formvar.cc @@ -343,7 +343,6 @@ MNode* ml_formvar_input_file (MNode* cell, MlEnv* mlenv) { ustring var; ustring fname; FormVarOp opt; -// ustring val; ustring tgt; ustring filename; int idx; @@ -384,14 +383,10 @@ MNode* ml_formvar_input_file (MNode* cell, MlEnv* mlenv) { if (mlenv->env->storedir.empty ()) { newStoreSerial (mlenv); } -// mlenv->env->form->fileAt (var, val); -// if (val.size () > 0) { -// i = strtoul (val); idx = mlenv->env->form->at (var, filename); idx = mlenv->env->form->partAt (idx); if (idx >= 0) { tgt = mlenv->env->path_store_file (fname); -// mlenv->env->form->at (var, filename); if (opt.filter.size () > 0) { if (wsearch_env (mlenv->regenv, filename, opt.filter)) { filename.assign (mlenv->regenv.regmatch[0].first, mlenv->regenv.regmatch[0].second); @@ -438,7 +433,7 @@ MNode* ml_formvar_input_file (MNode* cell, MlEnv* mlenv) { /*DOC: ===input-file@=== - (input-file@ ARRAY FILE_PREFIX) -> LIST_of_a_Pair_of_SavedFileName_and_OriginalFileName + (input-file@ ARRAY FILE_PREFIX) -> LIST_of_a_List_of_SavedFileName_OriginalFileName_and_Type */ //#AFUNC input-file@ ml_formvar_input_file_a @@ -453,7 +448,6 @@ MNode* ml_formvar_input_file_a (MNode* cell, MlEnv* mlenv) { size_t i, n; int idx; ustring tgt; -// ustring val; ustring si; ustring filename; ustring tgtname; @@ -461,18 +455,14 @@ MNode* ml_formvar_input_file_a (MNode* cell, MlEnv* mlenv) { setParams (arg, 2, ¶ms, NULL, NULL, NULL); name = eval_str (params[0], mlenv); prefix = eval_str (params[1], mlenv); -// prefix.append (uDash); if (mlenv->env->storedir.empty ()) { newStoreSerial (mlenv); } target0 = mlenv->env->path_store_file (prefix); -// n = mlenv->env->form->fileAtSize (name); n = mlenv->env->form->atSize (name); for (i = 0; i < n; i ++) { -// mlenv->env->form->fileAt (name, i, val); -// if (val.size () > 0) { idx = mlenv->env->form->at (name, i, filename); #ifdef DEBUG2 std::cerr << "i:" << idx << " "; @@ -485,13 +475,13 @@ MNode* ml_formvar_input_file_a (MNode* cell, MlEnv* mlenv) { si.assign (to_ustring (i + 1)); tgt.assign (target0).append (si); tgtname.assign (prefix).append (si); -// mlenv->env->form->at (name, i, filename); // ++ filter if (filename.length () > 0) { MNodeList l; mlenv->env->form->saveFile (idx, tgt, 0); // ++ opt.max l.append (newMNode_str (new ustring (tgtname))); l.append (newMNode_str (new ustring (filename))); + l.append (newMNode_str (new ustring (mlenv->env->form->typeAt (idx)))); ans.append (l.release ()); #ifdef DEBUG if (mlenv->log) { diff --git a/modules/ml-http.cc b/modules/ml-http.cc index 6409ebb..ac031d0 100644 --- a/modules/ml-http.cc +++ b/modules/ml-http.cc @@ -413,7 +413,7 @@ MNode* ml_build_url (MNode* cell, MlEnv* mlenv) { if (! checkScheme (scheme)) throw (scheme + ": bad scheme."); - if (! checkHostname (host)) + if (! matchHostname (host)) throw (host + ": bad hostname."); ans = new ustring; @@ -580,7 +580,7 @@ MNode* ml_hostnamep (MNode* cell, MlEnv* mlenv) { if (arg) throw (uErrorWrongNumber); - return newMNode_bool (checkHostname (hostname)); + return newMNode_bool (matchHostname (hostname)); } /*DOC: @@ -725,7 +725,7 @@ MNode* ml_http_get (MNode* cell, MlEnv* mlenv) { obj.http->rawquery = to_string (t ()); if (evkw (26, t)) { obj.http->querytype = to_string (t ()); - if (!checkASCII (obj.http->querytype)) + if (! matchASCII (obj.http->querytype.begin (), obj.http->querytype.end ())) throw (obj.http->querytype + ustring (CharConst (": bad type"))); } if (evkw (27, t)) // raw-file-serial diff --git a/modules/ml-motor.cc b/modules/ml-motor.cc index 347f706..d485e2a 100644 --- a/modules/ml-motor.cc +++ b/modules/ml-motor.cc @@ -52,7 +52,7 @@ MNode* ml_output_header (MNode* cell, MlEnv* mlenv) { if (type.empty ()) throw (ustring (CharConst ("missing type."))); - else if (! checkMimeType (type)) + else if (! matchMimeType (type)) type = mimetype (type); if (! mlenv->env->responseDone) { @@ -107,7 +107,7 @@ MNode* ml_motor_file (MNode* cell, MlEnv* mlenv) { #endif if (evkw (0, t)) { // type type = t.to_asciiword (); - if (! checkMimeType (type)) + if (! matchMimeType (type)) type.resize (0); } if (keywords[1]) // error diff --git a/modules/ml-neon.cc b/modules/ml-neon.cc index 942bfe3..a4e9246 100644 --- a/modules/ml-neon.cc +++ b/modules/ml-neon.cc @@ -330,7 +330,7 @@ void NeonSession::setNoVerify () { } void NeonSession::setProxy (const ustring& host, int port) { - if (checkHostname (host) && port > 0 && port < 65536) { + if (matchHostname (host) && port > 0 && port < 65536) { #ifdef DEBUG2 std::cerr << "set proxy " << host << ":" << port << "\n"; #endif /* DEBUG */ @@ -813,7 +813,7 @@ MNode* ml_neon (MNode* cell, MlEnv* mlenv) { evkw (4, errfn); // 4:on-error evkw_bool (5, obj.fnoverify); // 5:no-verify - if (! checkHostname (obj.host)) + if (! matchHostname (obj.host)) throw (obj.host + ": bad hostname."); if (obj.port <= 0 || obj.port >= 65536) throw (to_ustring (obj.port) + ": bad port number."); @@ -994,7 +994,7 @@ MNode* ml_neon_http_request (MNode* cell, MlEnv* mlenv, MLFunc* mobj) { obj->query->rawquery.srcStatic (to_string (t ())); if (evkw (16, t)) { // 16:query-type obj->query->querytype = to_string (t ()); - if (!checkASCII (obj->query->querytype)) + if (! matchASCII (obj->query->querytype.begin (), obj->query->querytype.end ())) throw (obj->query->querytype + ustring (CharConst (": bad type"))); } evkw (17, obj->query->cookie); // 17:cookie diff --git a/modules/ml-sendmail.cc b/modules/ml-sendmail.cc index 6008357..19e82a8 100644 --- a/modules/ml-sendmail.cc +++ b/modules/ml-sendmail.cc @@ -16,6 +16,7 @@ #include "util_check.h" #include "util_file.h" #include "util_string.h" +#include "util_splitter.h" #include "util_proc.h" #include "util_time.h" #include "ustring.h" @@ -69,8 +70,6 @@ static void sendmail (const ustring& text, const ustring& faddr, std::vectorenv); } - b = out.ans.begin (); - e = out.ans.end (); + SplitterNL sp (out.ans); line.resize (0); - while (usearch (b, e, m, re_nl)) { - if (b == m[0].first) { - b = m[0].second; + while (sp.next ()) { + if (sp.b == sp.t) { break; } - if (usearch (b, m[0].first, m2, re_white)) { + if (*sp.b == ' ' || *sp.b == '\t') { line.append (uLF); - line.append (b, m[0].first); + line.append (sp.pre ()); } else { if (line.size () > 0) hdr.line (line); - line = ustring (b, m[0].first); + line = ustring (sp.pre ()); } - b = m[0].second; } if (line.size () > 0) hdr.line (line); @@ -153,15 +148,10 @@ static void sendmail (const ustring& text, const ustring& faddr, std::vector& args, MlEnv* mlenv) { } } +#if 0 /*DOC: ===doarray=== [[doarray:VARIABLE,...[:VARIABLE] TEXT...]] @@ -217,3 +218,4 @@ void mf_doarray (const std::vector& args, const MotorObj::MotorObjVec& //**** } +#endif diff --git a/wiki/wikiattrib.cc b/wiki/wikiattrib.cc index 1ae61ad..f6b5b09 100644 --- a/wiki/wikiattrib.cc +++ b/wiki/wikiattrib.cc @@ -266,7 +266,7 @@ bool WikiAttrib1::paramID (const ustring& key, WikiMotorObjVec& vval, bool& fer void WikiAttrib1::paramIDValue (const ustring& key, WikiMotorObjVec& vval, ustring& var, bool& ferr) { ustring value (vval.textOut (wiki)); - if (checkWikiID (value)) { + if (matchWikiID (value)) { var = value; ferr = false; } else { @@ -291,7 +291,7 @@ void WikiAttrib1::paramClassValue (WikiMotorObjVec& vval, std::vector& for (int i = 0; i < args.size (); i ++) { ustring value (args[i]->textOut (wiki)); if (value.length () > 0) { - if (checkWikiID (value)) { + if (matchWikiID (value)) { var.push_back (value); } else { wiki->errorMsg.append (value).append (CharConst (": bad class name\n")); @@ -321,7 +321,7 @@ bool WikiAttrib1::paramHeight (const ustring& key, WikiMotorObjVec& vval, ustri void WikiAttrib1::paramWidthValue (const ustring& key, WikiMotorObjVec& vval, ustring& var, bool& ferr) { ustring value (vval.textOut (wiki)); - if (checkWidth (value)) { + if (matchWidth (value)) { var = value; ferr = false; } else { @@ -333,7 +333,7 @@ void WikiAttrib1::paramWidthValue (const ustring& key, WikiMotorObjVec& vval, u bool WikiAttrib1::paramSize (const char* name, size_t namelen, const ustring& key, WikiMotorObjVec& vval, ustring& var, bool& ferr) { if (match (key, name, namelen)) { ustring value (vval.textOut (wiki)); - if (checkNum (value)) { + if (matchNum (value)) { var = value; ferr = false; } else { @@ -345,7 +345,7 @@ bool WikiAttrib1::paramSize (const char* name, size_t namelen, const ustring& k } void WikiAttrib1::paramUNum (const ustring& value, int& var, const ustring& name) { - if (checkNum (value)) { + if (matchNum (value)) { var = strtoul (value); } else { wiki->errorMsg.append (name).append (uEq).append (value).append (uErrorBadValue).append (uLF); @@ -365,7 +365,7 @@ bool WikiAttrib1::paramTargetCheck (const ustring& key) { } void WikiAttrib1::paramTargetBody (const ustring& key, const ustring& value, ustring& var, bool& ferr) { - if (value.length () == 0 || checkWikiID (value)) { + if (value.length () == 0 || matchWikiID (value)) { var = value; } else { if (key.length () > 0) @@ -394,9 +394,9 @@ bool WikiAttrib1::paramOnChangeCheck (const ustring& name) { #ifdef BOOTSTRAPHACK bool WikiAttrib1::paramDataPrefix (const ustring& key, WikiMotorObjVec& vval, bool& ferr) { - if (matchHead (key, CharConst ("data-")) && checkWikiID (key)) { + if (matchHead (key, CharConst ("data-")) && matchWikiID (key)) { ustring value (vval.textOut (wiki)); - if (checkWikiID (value)) { + if (matchWikiID (value)) { datapre.push_back (std::pair (key, value)); ferr = false; } else { @@ -582,10 +582,10 @@ void WikiAttribTable::outputMore (MotorOutput* out) { /* ============================================================ */ bool WikiAttribImg::readAttribMore (const ustring& key, WikiMotorObjVec& vval, bool& ferr) { if (paramWidth (key, vval, width, ferr)) { - if (checkNum (width)) + if (matchNum (width)) width.append (CharConst ("px")); } else if (paramHeight (key, vval, height, ferr)) { - if (checkNum (height)) + if (matchNum (height)) height.append (CharConst ("px")); } else if (match (key, CharConst ("alt"))) { alt = vval.textOut (wiki); @@ -616,7 +616,7 @@ bool WikiAttribInput::readAttribMore (const ustring& key, WikiMotorObjVec& vval ustring v = vval.textOut (wiki); if (match (v, CharConst ("*"))) { elsize = 1; - } else if (checkNum (v)) { + } else if (matchNum (v)) { elsize = to_int32 (v); if (elsize < 0 || elsize > 999) { elsize = 1; @@ -679,7 +679,7 @@ void WikiAttribInput::outputMore (MotorOutput* out) { wiki->outputName (out, CharConst ("size"), psize); wiki->outputName (out, CharConst ("size"), elsize); if (pwidth.size () > 0) { - if (checkNum (pwidth)) { + if (matchNum (pwidth)) { out->out_raw (CharConst (" style=\"width:"))->out_toHTML_noCtrl (pwidth)->out_raw (CharConst ("px;\"")); } else { out->out_raw (CharConst (" style=\"width:"))->out_toHTML_noCtrl (pwidth)->out_raw (CharConst (";\"")); diff --git a/wiki/wikicmd.cc b/wiki/wikicmd.cc index 2916aa9..03287d9 100644 --- a/wiki/wikicmd.cc +++ b/wiki/wikicmd.cc @@ -389,6 +389,16 @@ void wc_evalblock (WikiLine* wl, WikiFormat* wiki) { } /* ============================================================ */ +static bool matchSkipEqs (uiterator& b, uiterator e) { + if (b < e && *b == '=') { + do { + ++ b; + } while (b < e && *b == '='); + return true; + } + return false; +} + /*DOC: ===$insert=== $insert:VARIABLE @@ -411,8 +421,6 @@ void wc_insert (WikiLine* wl, WikiFormat* wiki) { bool super = false; bool protect; int i; - static uregex re_eq ("=+"); - umatch m; #ifdef DEBUG std::cerr << "(wiki):" << ustring (wl->begin0, wl->end) << "\n"; @@ -427,14 +435,19 @@ void wc_insert (WikiLine* wl, WikiFormat* wiki) { for (i = 1; i < args.size (); i ++) { if (! protect && match (args[i], CharConst ("superuser"))) { super = true; - } else if (usearch (args[i], m, re_eq)) { - hn = m[0].second - m[0].first; - if (hn < 0) - hn = 0; - if (hn > 5) - hn = 5; } else { - // bad parameter + uiterator b = args[i].begin (); + uiterator e = args[i].end (); + uiterator p = b; + if (matchSkipEqs (b, e)) { + hn = b - p; + if (hn < 0) + hn = 0; + if (hn > 5) + hn = 5; + } else { + // bad parameter + } } } if (args.size () >= 1) { diff --git a/wiki/wikienv.cc b/wiki/wikienv.cc index 74756db..43023ab 100644 --- a/wiki/wikienv.cc +++ b/wiki/wikienv.cc @@ -3,8 +3,6 @@ #include "ml.h" #include "ustring.h" -uregex re_wikicmdsep ("(:)|([ \t]+$)"); - void MacroVar::setVar (const ustring& name, MNode* var, WikiLine::linevec* wl) { std::pair x; erase (name); diff --git a/wiki/wikienv.h b/wiki/wikienv.h index 0dbbbac..cf95822 100644 --- a/wiki/wikienv.h +++ b/wiki/wikienv.h @@ -27,6 +27,4 @@ class WikiEnv { virtual ~WikiEnv () {}; }; -extern uregex re_wikicmdsep; - #endif /* WIKIENV_H */ diff --git a/wiki/wikiformat.cc b/wiki/wikiformat.cc index 43435c7..1530747 100644 --- a/wiki/wikiformat.cc +++ b/wiki/wikiformat.cc @@ -1536,29 +1536,56 @@ void WikiBlockRaw::output (MotorOutput* out) { /* ============================================================ */ void WikiFormat::pass1 (const ustring& text, WikiLine::linevec* block, bool fsuper) { - Splitter sp (text, re_nl); + SplitterNL sp (text); pass1_1 (sp, NULL, NULL, block, NULL, NULL, NULL, fsuper); } +static bool findCmdSep (uiterator& b, uiterator e, uiterator& u) { + int c; + uiterator p = b; + for (; p < e; ++ p) { + c = *p; + if (c == ':') { + b = p; + u = b + 1; + return true; + } else if (c == ' ' || c == '\t') { + b = p; + u = b; + do { + ++ u; + } while (u < e && ((c = *u) == ' ' || c == '\t')); + return true; + } + } + b = p; + u = e; + return true; +} + int WikiFormat::pass1_1 (Splitter& sp, ustring* elseword, ustring* endword, WikiLine::linevec* block, uiterator* elsebegin0, uiterator* elsebegin, uiterator* elseend, bool fsuper) { uiterator b, e, t, u, v; - umatch m; +// umatch m; +// static uregex re_wikicmdsep ("(:)|([ \t]+$)"); while (sp.next ()) { b = sp.begin (); e = sp.end (); - while (b < e && b[0] == '\t') + while (b < e && b[0] == '\t') // TABを無視 b ++; if (matchSkip (b, e, CharConst (kComment))) { // comment } else if (b != e && b[0] == kWikiCmd) { - if (usearch (b, e, m, re_wikicmdsep)) { - t = b; - u = m[0].first; - v = m[0].second; +// if (usearch (b, e, m, re_wikicmdsep)) { +// t = b; +// u = m[0].first; +// v = m[0].second; + t = b; + u = b; + if (findCmdSep (u, e, v)) { } else { - t = b; +// t = b; u = e; v = e; } @@ -1867,7 +1894,6 @@ void WikiFormat::compileLine (WikiLineScanner& scanner) { blockp->push_back (cur); cur->addLine (b, e); push_block (&obj->block); -// } else if (curform == NULL && matchHead (b, e, CharConst ("{form:"))) { } else if (curform == NULL && matchHead (b, e, CharConst (uWikiFORM))) { WikiBlockComplex* obj; if (cur) diff --git a/wiki/wikiline.cc b/wiki/wikiline.cc index 576df63..37244d4 100644 --- a/wiki/wikiline.cc +++ b/wiki/wikiline.cc @@ -378,6 +378,28 @@ bool wl_color (WikiMotorObjVecVec* args, WikiMotorObjVec* arg2, WikiMotorObjVec return true; } +static bool matchAnchor (uiterator b, uiterator e) { + int c; + static char table_anchor[] = { // [a-zA-Z0-9_\-] + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + }; + for (; b < e; ++ b) { + c = *b; + if (0 <= c && c < 128 && table_anchor[c]) { + } else { + return false; + } + } + return true; +} + /*DOC: ===アンカー=== [[anchor:Fig1]] @@ -386,11 +408,9 @@ bool wl_color (WikiMotorObjVecVec* args, WikiMotorObjVec* arg2, WikiMotorObjVec //#WIKILINE anchor wl_anchor bool wl_anchor (WikiMotorObjVec* arg, WikiMotorObjVec& out, WikiFormat* wiki) { ustring name (arg->textOut (wiki)); - umatch m; - static uregex re ("^[a-zA-Z0-9_-]+$"); MotorOutputString html; - if (! usearch (name, m, re)) + if (! matchAnchor (name.begin (), name.end ())) return false; html.out_raw (CharConst ("outputName (&html, CharConst ("name"), name, false); diff --git a/wiki/wikimotor.cc b/wiki/wikimotor.cc index 18c4fe3..c29930f 100644 --- a/wiki/wikimotor.cc +++ b/wiki/wikimotor.cc @@ -383,7 +383,7 @@ bool WikiMotorObjVec::splitURL (WikiFormat* wiki, ustring& proto, ustring& host vpath.push_back (WikiMotorObjPtr (new WikiMotorObjText (uSlash))); vec.splitChar ('/', vhost, vpath); host = vhost.textOut (wiki); - if (! checkHostname (host)) { + if (! matchHostname (host)) { wiki->errorMsg.append (host).append (CharConst (": bad hostname.\n")); host = uEmpty; } @@ -400,7 +400,7 @@ bool WikiMotorObjVec::splitURL (WikiFormat* wiki, ustring& url) { ustring proto, host, path, params, anchor; if (splitURL (wiki, proto, host, path, params, anchor)) { - if (checkHostname (host)) { + if (matchHostname (host)) { url.assign (proto).append (CharConst ("://")).append (host).append (path); if (params.length () > 0) url.append (CharConst ("?")).append (params); @@ -436,7 +436,7 @@ bool WikiMotorObjVec::splitURL_2 (WikiFormat* wiki, ustring& host, ustring& pat vpath.push_back (WikiMotorObjPtr (new WikiMotorObjText (uSlash))); fsp = vec.splitChar ('/', vhost, vpath); host = vhost.textOut (wiki); - if (! checkHostname (host)) { + if (! matchHostname (host)) { wiki->errorMsg.append (host).append (CharConst (": bad hostname.\n")); host = uEmpty; } @@ -454,7 +454,7 @@ bool WikiMotorObjVec::splitURL_2 (WikiFormat* wiki, const ustring& proto, ustri ustring host, path, params, anchor; if (splitURL_2 (wiki, host, path, params, anchor)) { - if (checkHostname (host)) { + if (matchHostname (host)) { url.assign (proto).append (CharConst ("://")).append (host).append (path); if (params.length () > 0) url.append (CharConst ("?")).append (params); @@ -476,7 +476,7 @@ bool WikiMotorObjVec::splitURL_3 (WikiFormat* wiki, ustring& port, ustring& pat vpath.push_back (WikiMotorObjPtr (new WikiMotorObjText (uSlash))); if (splitChar ('/', vport, vpath)) { ustring v = vport.textOut (wiki); - if (checkNum (v)) { + if (matchNum (v)) { int n = strtoul (v); if (1 <= n && n < 65536) { port = v; diff --git a/wiki/wikimotor.h b/wiki/wikimotor.h index e9d314e..a20d985 100644 --- a/wiki/wikimotor.h +++ b/wiki/wikimotor.h @@ -416,7 +416,7 @@ class WikiMotor { static const int TMATCH_BAR2 = 0x10; WikiFormat* wiki; - Splitter sp; + SplitterRe sp; boost::ptr_vector z; WikiMotor (uiterator b, uiterator e, WikiFormat* w): sp (b, e, re_wiki1) { -- 2.11.0