-- Lexerの実装を継続している。

author derui <derutakayu@user.sourceforge.jp>

Fri, 19 Jun 2009 16:07:40 +0000 (01:07 +0900)

committer derui <derutakayu@user.sourceforge.jp>

Fri, 19 Jun 2009 16:07:40 +0000 (01:07 +0900)
author derui <derutakayu@user.sourceforge.jp>
Fri, 19 Jun 2009 16:07:40 +0000 (01:07 +0900)
committer derui <derutakayu@user.sourceforge.jp>
Fri, 19 Jun 2009 16:07:40 +0000 (01:07 +0900)
diff --git a/sublexer_impl.cpp b/sublexer_impl.cpp

index 947b41c..2d13826 100644 (file)
--- a/sublexer_impl.cpp
+++ b/sublexer_impl.cpp
@@ -52,10 +52,10 @@ smart_ptr<lexeme::ILexeme> sublexer::FirstLexer::lex(smart_ptr<utf8::UTF8InputSt
          if (lexer_delimiter::Normal()(UTF8Char(stream->peek())))
          {
              return lexeme::makeDot();
-        } else {
-            // デリミタでない場合、これはエラーとして扱われる。
-            throw sublexer::LexException(stream->pos(), ".はデリミタで区切られなければなりません");
-        }
+        }//  else {
+//             // デリミタでない場合、これはエラーとして扱われる。
+//             throw sublexer::LexException(stream->pos(), ".はデリミタで区切られなければなりません");
+//         }
      }
      else if (utf8_string::is_numeric(ch))
      {
@@ -70,7 +70,8 @@ smart_ptr<lexeme::ILexeme> sublexer::FirstLexer::lex(smart_ptr<utf8::UTF8InputSt
               ch.toUTF16Code() == ':' || ch.toUTF16Code() == '<' ||
               ch.toUTF16Code() == '>' || ch.toUTF16Code() == '?' ||
               ch.toUTF16Code() == '^' || ch.toUTF16Code() == '_' ||
-             ch.toUTF16Code() == '~')
+             ch.toUTF16Code() == '~' || ch.toUTF16Code() == '+' ||
+             ch.toUTF16Code() == '.')
      {
          // やたらと多いが、上記のどれかである場合には、identityとして解析を
          // 開始させる。
@@ -177,6 +178,21 @@ smart_ptr<lexeme::ILexeme> sublexer::FirstLexer::lex_(const utakata::utf8_string
              next.add(new sublexer::CharactorLexer());
          }
      }
+    else if (str[0].toUTF16Code() == '-')
+    {
+        // 先頭が-の場合、次の文字を見てから決める。
+        // 基本的には数値だが、次の文字によってはidentifierになりうる。
+        UTF8Char ch(stream->peek());
+        lexer_delimiter::Normal nor;
+        if (ch.toUTF16Code() == '>' || nor(ch))
+        {
+            next.add(new sublexer::IdentifierLexer(str));
+        }
+        else
+        {
+            next.add(new sublexer::NumberLexer(str));
+        }
+    }
  
      return smart_ptr<lexeme::ILexeme>();
  }
@@ -211,6 +227,37 @@ smart_ptr<lexeme::ILexeme> sublexer::NumberLexer::lex(smart_ptr<utf8::UTF8InputS
  smart_ptr<lexeme::ILexeme> sublexer::IdentifierLexer::lex(smart_ptr<utf8::UTF8InputStream> stream,
                                                            smart_ptr<sublexer::ISubLexer>& next)
  {
+    // Identityを解析する。解釈自体は結構簡単。
+    
+    // 一文字目はすでに判定されているため、二文字目以降で判定する。
+    // 二文字名以降でなければ判定できないものもあるため、一度まとめてみる。
+    lexer_delimiter::Normal nor;
+    UTF8String& str = str_;
+    if (str[0].toUTF16Code() == '+')
+    {
+        return makePlusIdentifier();
+    }
+    else if (str[0].toUTF16Code() == '-')
+    {
+        // 次の文字がデリミタの場合には、そのまま返す。
+        UTF8Char next(stream->peek());
+        if (nor(next))
+        {
+            return makeMinusIdentifier();
+        }
+        else if (next.toUTF16Code() == '>')
+        {
+            // この場合、そのまま続けてもよし。
+        }
+    }
+
+    while (!stream->isEOF()) {
+        UTF8Char tmp(stream->peek());
+        if (nor(tmp)) {
+            break;
+        }
+        str += stream->read();
+    }
      return smart_ptr<lexeme::ILexeme>();
  }
  
@@ -222,53 +269,30 @@ smart_ptr<lexeme::ILexeme> sublexer::CharactorLexer::lex(smart_ptr<utf8::UTF8Inp
      // #\に続く文字名、あるいは#\xに続く16進数のチェックを行なう。
      // ここに来た時点で、#\までは読まれている。
      UTF8Char ch(stream->read());
+    bool flag = ch.toUTF16Code() == 'x' ? true : false;
  
-    if (ch.toUTF16Code() == 'x')
-    {
-        // Hexを解釈する。解釈部分を書くと面倒になるため、単純な別関数にしておく。
-        UTF8String str(ch.getBytes());
-        lexer_delimiter::Normal nor;
-        lexer_delimiter::HexValue hex;
-        while (!stream->isEOF()) {
-            UTF8Char tmp(stream->peek());
-            if (nor(tmp))
-            {
-                break;
-            }
-            else if (!hex(tmp))
-            {
-                // hex valueではなかった場合には、これはエラーであると
-                // して返す。
-                std::stringstream ss;
-                ss << tmp.toStr() << "は16進数中の文字名として利用できません";
-                throw sublexer::LexException(stream->pos(), ss.str());
-            }
-            str += stream->read();
+    // Hexを解釈する。解釈部分を書くと面倒になるため、単純な別関数にしておく。
+    UTF8String str(ch.getBytes());
+    lexer_delimiter::Normal nor;
+    lexer_delimiter::HexValue hex;
+    while (!stream->isEOF()) {
+        UTF8Char tmp(stream->peek());
+        if (nor(tmp))
+        {
+            break;
          }
-
-        return makeHexCharactor(tmp);
-    }
-    else
-    {
-        // 文字名として解釈する。
-        // デリミタで区切られなければならないため、デリミタまでを取得する。
-        // なお空白文字はchに格納されているため、ここでは問題にはならない。
-        UTF8String str(ch.getBytes());
-        lexer_delimiter::Normal nor;
-        while (!stream->isEOF()) {
-            UTF8Char tmp(stream->peek());
-            if (nor(tmp))
-            {
-                // この時点で抜ける。
-                break;
-            }
-            str += stream->read();
+        else if (flag && !hex(tmp))
+        {
+            // hex valueではなかった場合には、これはエラーであると
+            // して返す。
+            std::stringstream ss;
+            ss << tmp.toStr() << "は16進数中の文字名として利用できません";
+            throw sublexer::LexException(stream->pos(), ss.str());
          }
-        // 実際に但しいかどうかはここでは検証しない。長いし。
-        return makeCharactor(str);
+        str += stream->read();
      }
-        
-    return smart_ptr<lexeme::ILexeme>();
+
+    return makeCharactor(tmp);
  }
  
  //================================================================================
diff --git a/sublexer_impl.h b/sublexer_impl.h

index 6b310c7..923b650 100644 (file)
--- a/sublexer_impl.h
+++ b/sublexer_impl.h
@@ -127,6 +127,10 @@ namespace utakata {
  
              smart_ptr<lexeme::ILexeme> lex(smart_ptr<utakata::utf8::UTF8InputStream> stream,
                                             smart_ptr<ISubLexer>& next);
+
+        private:
+
+            smart_ptr<utakata::utf8_string::UTF8String> str_;
          };
  
      };
author	derui <derutakayu@user.sourceforge.jp>
	Fri, 19 Jun 2009 16:07:40 +0000 (01:07 +0900)
committer	derui <derutakayu@user.sourceforge.jp>
	Fri, 19 Jun 2009 16:07:40 +0000 (01:07 +0900)
sublexer_impl.cpp		patch \| blob \| history
sublexer_impl.h		patch \| blob \| history