sublexer_impl.cpp

   1 #include <iostream>
   2 #include <assert.h>
   3
   4 #include "utf8_string.h"
   5 #include "utf8.h"
   6
   7 #include "sublexer_impl.h"
   8 #include "lexeme_impl.h"
   9
  10 using namespace utakata;
  11
  12 smart_ptr<sublexer::ISubLexer> sublexer::CFirstLex::lex(const utf8_string::CUTF8Char& ch,
  13                                                         const utf8_string::CUTF8String& str,
  14                                                         smart_ptr<utf8::CUTF8InputStream>& stream)
  15 {
  16     // chにはlexerから渡された、今回読みだした文字が渡されている。
  17
  18     smart_ptr<sublexer::ISubLexer> ret;
  19     // ()[]`',.は構文解析の重要な要素となるため、このまま返す。
  20     if (ch.toUTF16Code() == '(' || ch.toUTF16Code() == '[')
  21     {
  22         lexeme_ = lexeme::makeOpenParen();
  23     }
  24
  25     if (ch.toUTF16Code() == ')' || ch.toUTF16Code() == ']')
  26     {
  27         lexeme_ = lexeme::makeCloseParen();
  28     }
  29
  30     if (ch.toUTF16Code() == '`')
  31     {
  32         lexeme_ = lexeme::makeBackQuote();
  33     }
  34
  35     if (ch.toUTF16Code() == '\'')
  36     {
  37         lexeme_ = lexeme::makeQuote();
  38     }
  39
  40     if (ch.toUTF16Code() == '.')
  41     {
  42         lexeme_ = lexeme::makeDot();
  43     }
  44
  45     if (ch.toUTF16Code() == ',' || ch.toUTF16Code() == '#')
  46     {
  47         // ,や#の場合、次に別の文字が続く可能性があるため、NextLexemeLexerを
  48         // 呼び出すようにしておく。
  49         ret.add(new sublexer::CNextLexemeLexer());
  50     }
  51
  52     if (ch.toUTF16Code() == '"')
  53     {
  54         // 先頭が"の場合、stringと判断される。
  55         ret.add(new sublexer::CStringLexer());
  56     }
  57
  58     // 基本的に判定できないことはありえない。
  59     return ret;
  60 }
  61
  62 smart_ptr<lexeme::ILexeme> sublexer::CFirstLex::getLexeme()
  63 {
  64     return smart_ptr<lexeme::ILexeme>();
  65 }
  66
  67 //================================================================================
  68
  69 smart_ptr<sublexer::ISubLexer> sublexer::CStringLexer::lex(
  70     const utf8_string::CUTF8Char& ch,
  71     const utf8_string::CUTF8String& str,
  72     smart_ptr<utf8::CUTF8InputStream>& stream
  73     )
  74 {
  75     return smart_ptr<ISubLexer>();
  76 }
  77
  78 smart_ptr<lexeme::ILexeme> sublexer::CStringLexer::getLexeme()
  79 {
  80     return lexeme_;
  81 }
  82
  83
  84 //================================================================================
  85
  86 smart_ptr<sublexer::ISubLexer> sublexer::CNextLexemeLexer::lex(
  87     const utf8_string::CUTF8Char& ch,
  88     const utf8_string::CUTF8String& str,
  89     smart_ptr<utf8::CUTF8InputStream>& stream
  90     )
  91 {
  92
  93     smart_ptr<sublexer::ISubLexer> ret;
  94     // 二文字目が特殊であるようなデータを判別するためのlexer。
  95     if (str.size() == 1)
  96     {
  97         return checkSecondChar(ch, str, stream);
  98     }
  99
 100
 101     return smart_ptr<ISubLexer>();
 102 }
 103
 104 smart_ptr<lexeme::ILexeme> sublexer::CNextLexemeLexer::getLexeme()
 105 {
 106     return lexeme_;
 107 }
 108
 109 smart_ptr<sublexer::ISubLexer> sublexer::CNextLexemeLexer::checkSecondChar(
 110     const utf8_string::CUTF8Char& ch,
 111     const utf8_string::CUTF8String& str,
 112     smart_ptr<utf8::CUTF8InputStream>& stream
 113     )
 114 {
 115     smart_ptr<sublexer::ISubLexer> ret;
 116
 117     if (str[0].toUTF16Code() == '#')
 118     {
 119         // 先頭が#の場合。
 120         if (ch.toUTF16Code() == '\'')
 121         {
 122             lexeme_ = lexeme::makeQuasiQuote();
 123         }
 124         else if (ch.toUTF16Code() == '`')
 125         {
 126             lexeme_ = lexeme::makeQuasiBackQuote();
 127         }
 128         else if (ch.toUTF16Code() == ',')
 129         {
 130             // これもまた次の文字で確定するため、さらに次の文字をチェッ
 131             // クする。
 132             ret.add(new CNextLexemeLexer());
 133         }
 134         else if (ch.toUTF16Code() == '(')
 135         {
 136             lexeme_ = lexeme::makeByteVectorOpen();
 137         }
 138         else if (ch.toUTF16Code() == '!')
 139         {
 140             // detumコメント(こう呼んでおく)の開始
 141             lexeme_ = lexeme::makeStartDetumComment();
 142         }
 143         else if (ch.toUTF16Code() == '|')
 144         {
 145             // ネスト可能コメントの開始
 146             lexeme_ = lexeme::makeStartNestedComment();
 147         }
 148         else if (ch.toUTF16Code() == 't' || ch.toUTF16Code() == 'T')
 149         {
 150             // true
 151             lexeme_ = lexeme::makeTrue();
 152         }
 153         else if (ch.toUTF16Code() == 'f' || ch.toUTF16Code() == 'F')
 154         {
 155             // false
 156             lexeme_ = lexeme::makeFalse();
 157         }
 158         else if (ch.toUTF16Code() == '\\')
 159         {
 160             // 文字名かhexかどうかの判別のため、さらに次の文字を判定す
 161             // る必要がある。
 162             ret.add(new CNextLexemeLexer());
 163         }
 164     }
 165     else if (str[0].toUTF16Code() == '|')
 166     {
 167         // 一文字目がこれの場合には、ネストコメントの末尾しかない。
 168         if (ch.toUTF16Code() == '#')
 169         {
 170             lexeme_ = lexeme::makeEndNextedComment();
 171         } else {
 172             // ネストコメント以外というのはとりあえずありえないため、
 173             // ここで不正であるトークンを返す。
 174             lexeme_ = lexeme::makeIllegalToken(str + ch);
 175         }
 176     }
 177
 178     return ret;
 179 }