1 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
\r
7 #include "errormessage.h"
\r
12 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
\r
16 Token::Token(const Token &i_token)
\r
17 : m_type(i_token.m_type),
\r
18 m_isValueQuoted(i_token.m_isValueQuoted),
\r
19 m_numericValue(i_token.m_numericValue),
\r
20 m_stringValue(i_token.m_stringValue),
\r
21 m_data(i_token.m_data)
\r
25 Token::Token(int i_value, const tstringi &i_display)
\r
26 : m_type(Type_number),
\r
27 m_isValueQuoted(false),
\r
28 m_numericValue(i_value),
\r
29 m_stringValue(i_display),
\r
34 Token::Token(const tstringi &i_value, bool i_isValueQuoted, bool i_isRegexp)
\r
35 : m_type(i_isRegexp ? Type_regexp : Type_string),
\r
36 m_isValueQuoted(i_isValueQuoted),
\r
38 m_stringValue(i_value),
\r
43 Token::Token(Type i_m_type)
\r
45 m_isValueQuoted(false),
\r
47 m_stringValue(_T("")),
\r
50 ASSERT(m_type == Type_openParen || m_type == Type_closeParen ||
\r
51 m_type == Type_comma);
\r
54 // get numeric value
\r
55 int Token::getNumber() const
\r
57 if (m_type == Type_number)
\r
58 return m_numericValue;
\r
59 if (m_stringValue.empty())
\r
62 throw ErrorMessage() << _T("`") << *this << _T("' is not a Type_number.");
\r
66 tstringi Token::getString() const
\r
68 if (m_type == Type_string)
\r
69 return m_stringValue;
\r
70 throw ErrorMessage() << _T("`") << *this << _T("' is not a string.");
\r
74 tstringi Token::getRegexp() const
\r
76 if (m_type == Type_regexp)
\r
77 return m_stringValue;
\r
78 throw ErrorMessage() << _T("`") << *this << _T("' is not a regexp.");
\r
81 // case insensitive equal
\r
82 bool Token::operator==(const _TCHAR *i_str) const
\r
84 if (m_type == Type_string)
\r
85 return m_stringValue == i_str;
\r
90 bool Token::operator==(const _TCHAR i_c) const
\r
92 if (i_c == _T('(')) return m_type == Type_openParen;
\r
93 if (i_c == _T(')')) return m_type == Type_openParen;
\r
98 void Token::add(const tstringi &i_str)
\r
100 m_stringValue += i_str;
\r
104 tostream &operator<<(tostream &i_ost, const Token &i_token)
\r
106 switch (i_token.m_type) {
\r
107 case Token::Type_string:
\r
108 i_ost << i_token.m_stringValue;
\r
110 case Token::Type_number:
\r
111 i_ost << i_token.m_stringValue;
\r
113 case Token::Type_regexp:
\r
114 i_ost << i_token.m_stringValue;
\r
116 case Token::Type_openParen:
\r
119 case Token::Type_closeParen:
\r
122 case Token::Type_comma:
\r
130 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
\r
134 Parser::Parser(const _TCHAR *i_str, size_t i_length)
\r
137 m_internalLineNumber(1),
\r
139 m_end(i_str + i_length)
\r
143 // set string that may be prefix of a token.
\r
144 // prefix_ is not copied, so it must be preserved after setPrefix()
\r
145 void Parser::setPrefixes(const Prefixes *i_prefixes)
\r
147 m_prefixes = i_prefixes;
\r
151 bool Parser::getLine(tstringi *o_line)
\r
155 if (m_ptr == m_end)
\r
158 const _TCHAR *begin = m_ptr;
\r
159 const _TCHAR *end = m_end;
\r
161 // lines are separated by: "\r\n", "\n", "\x2028" (Unicode Line Separator)
\r
162 while (m_ptr != m_end)
\r
167 //case _T('\x2028'): // (U+2028)
\r
173 if (m_ptr + 1 != m_end && m_ptr[1] == _T('\n')) {
\r
184 ++ m_internalLineNumber;
\r
185 // o_line->assign(begin, end); // why bcc cannot link this ?
\r
186 o_line->assign(begin, end - begin); // workarond for bcc
\r
191 static bool isSymbolChar(_TCHAR i_c)
\r
193 if (i_c == _T('\0'))
\r
195 if (_istlead(i_c) ||
\r
202 if (0x80 <= i_c && _istgraph(i_c))
\r
206 if (_istpunct(i_c))
\r
207 return !!_tcschr(_T("-+/?_\\"), i_c);
\r
211 if (_tcschr(_T("\x2190\x2191\x2192\x2193"), i_c)) {
\r
215 return _istgraph(i_c);
\r
219 // get a parsed line.
\r
220 // if no more lines exist, returns false
\r
221 bool Parser::getLine(std::vector<Token> *o_tokens)
\r
224 m_lineNumber = m_internalLineNumber;
\r
227 bool isTokenExist = false;
\r
228 continue_getLineLoop:
\r
229 while (getLine(&line)) {
\r
230 const _TCHAR *t = line.c_str();
\r
232 continue_getTokenLoop:
\r
234 // skip white space
\r
235 while (*t != _T('\0') && _istspace(*t))
\r
237 if (*t == _T('\0') || *t == _T('#'))
\r
238 goto break_getTokenLoop; // no more tokens exist
\r
239 if (*t == _T('\\') && *(t + 1) == _T('\0'))
\r
240 goto continue_getLineLoop; // continue to next line
\r
242 const _TCHAR *tokenStart = t;
\r
244 // comma or empty token
\r
245 if (*t == _T(',')) {
\r
247 o_tokens->push_back(Token(_T(""), false));
\r
248 isTokenExist = false;
\r
249 o_tokens->push_back(Token(Token::Type_comma));
\r
251 goto continue_getTokenLoop;
\r
255 if (*t == _T('(')) {
\r
256 o_tokens->push_back(Token(Token::Type_openParen));
\r
257 isTokenExist = false;
\r
259 goto continue_getTokenLoop;
\r
261 if (*t == _T(')')) {
\r
263 o_tokens->push_back(Token(_T(""), false));
\r
264 isTokenExist = true;
\r
265 o_tokens->push_back(Token(Token::Type_closeParen));
\r
267 goto continue_getTokenLoop;
\r
270 isTokenExist = true;
\r
274 for (size_t i = 0; i < m_prefixes->size(); i ++)
\r
275 if (_tcsnicmp(tokenStart, m_prefixes->at(i).c_str(),
\r
276 m_prefixes->at(i).size()) == 0) {
\r
277 o_tokens->push_back(Token(m_prefixes->at(i), false));
\r
278 t += m_prefixes->at(i).size();
\r
279 goto continue_getTokenLoop;
\r
282 // quoted or regexp
\r
283 if (*t == _T('"') || *t == _T('\'') ||
\r
284 *t == _T('/') || (*t == _T('\\') && *(t + 1) == _T('m') &&
\r
285 *(t + 2) != _T('\0'))) {
\r
286 bool isRegexp = !(*t == _T('"') || *t == _T('\''));
\r
287 _TCHAR q[2] = { *t++, _T('\0') }; // quote character
\r
288 if (q[0] == _T('\\')) {
\r
294 while (*t != _T('\0') && *t != q[0]) {
\r
295 if (*t == _T('\\') && *(t + 1))
\r
297 if (_istlead(*t) && *(t + 1))
\r
303 interpretMetaCharacters(tokenStart, t - tokenStart, q, isRegexp);
\r
306 str = guardRegexpFromMbcs(str.c_str());
\r
308 // concatinate continuous string
\r
310 0 < o_tokens->size() && o_tokens->back().isString() &&
\r
311 o_tokens->back().isQuoted())
\r
312 o_tokens->back().add(str);
\r
314 o_tokens->push_back(Token(str, true, isRegexp));
\r
315 if (*t != _T('\0'))
\r
317 goto continue_getTokenLoop;
\r
322 while (isSymbolChar(*t)) {
\r
323 if (*t == _T('\\'))
\r
328 if (_istlead(*t) && *(t + 1))
\r
332 if (t == tokenStart) {
\r
334 e << _T("invalid character ");
\r
337 e << std::hex; // << std::setw(4) << std::setfill(_T('0'));
\r
338 e << (int)(wchar_t)*t;
\r
341 e << std::hex; // << std::setw(2) << std::setfill(_T('0'));
\r
342 e << (int)(u_char)*t;
\r
346 e << _T("(") << *t << _T(")");
\r
350 _TCHAR *numEnd = NULL;
\r
351 long value = _tcstol(tokenStart, &numEnd, 0);
\r
352 if (tokenStart == numEnd) {
\r
353 tstring str = interpretMetaCharacters(tokenStart, t - tokenStart);
\r
354 o_tokens->push_back(Token(str, false));
\r
356 o_tokens->push_back(
\r
357 Token(value, tstringi(tokenStart, numEnd - tokenStart)));
\r
360 goto continue_getTokenLoop;
\r
363 break_getTokenLoop:
\r
364 if (0 < o_tokens->size())
\r
366 m_lineNumber = m_internalLineNumber;
\r
367 isTokenExist = false;
\r
370 return 0 < o_tokens->size();
\r