2 #include "src/exception_macro.h"
3 #include "src/lexer/string_lexer.h"
4 #include "src/lexer/term_lexer.h"
5 #include "src/lexer/term_checker.h"
6 #include "src/lexer/inline_hex_escape_lexer.h"
7 #include "src/lexeme.h"
8 #include "src/encoding_reader.h"
9 #include "src/unicode.h"
11 namespace lexer = utakata::lexer;
12 namespace term = utakata::lexer::term;
13 namespace reader = utakata::reader;
14 namespace unicode = utakata::unicode;
17 unsigned int lexer::EscapeConverter::Convert(unsigned int code) {
19 case 'a' : return kAlarm;
20 case 'b' : return kBackspace;
21 case 't' : return kTab;
22 case 'n' : return kLinefeed;
23 case 'v' : return kVTab;
24 case 'f' : return kForward;
25 case 'r' : return kCaridgeReturn;
26 case '"' : return kDoubleQuote;
27 case '\\' : return kBackslash;
33 bool lexer::EscapeLexer::Lex(reader::EncodingReader* reader,
35 unicode::UniString escape(reader->Read(kEscapeValidLength));
36 if (escape.IsEmpty() || escape.At(0).rawcode != '\\') {
40 if (escape.GetSize() < kEscapeValidLength) {
41 THROW_EXCEPTION_(lexer::LexException,
42 unicode::Convert("invalid escape sequence"));
45 lexer::InlineHexEscapeLexer inline_escape;
47 if (inline_escape(reader, &code)) {
52 lexer::TermLexer<term::IntralineWhiteSpaceChecker> intraline;
53 lexer::TermLexer<term::LineEndingChecker> lineend;
55 if (!EscapeConverter.CanConvert(escape.At(1).rawcode()) &&
56 !intraline.CheckToken(reader) && !lineend.CheckToken(reader)) {
57 unicode::UniString encoded_string("invalid escape sequence : ");
58 encoded_string.Append(escape);
59 THROW_EXCEPTION_(lexer::LexException, encoded_string);
62 *code = ConvertEscapeToCode(escape, reader);
66 // \<intraline whitespace>に該当する場合、返却されるcodeは0と
68 unsigned int lexer::EscapeLexer::ConvertEscapeToCode(
69 const unicode::UniString& escape, reader::EncodingReader* reader) {
70 unsigned int code = 0;
71 lexer::TermLexer<term::IntralineWhiteSpaceChecker> intraline;
72 lexer::TermLexer<term::LineEndingChecker> lineend;
74 if (!intraline.CheckToken(reader) && !lineend.CheckToken(reader)) {
75 lexer::EscapeConverter converter;
76 if (converter.CanConvert(escape.At(1).rawcode())) {
77 code = converter.Convert(escape.At(1).rawcode());
79 unicode::UniString encoded_string(
80 unicode::Convert("invalid escape sequence : "));
81 encoded_string.Append(escape);
82 THROW_EXCEPTION_(lexer::LexException, encoded_string);
85 ReadIntralines(reader);
91 // 処理の先頭で1文字だけReadしているのは、すでに\と次の文字が
93 void lexer::EscapeLexer::ReadIntralines(reader::EncodingReader* reader) {
94 lexer::TermLexer<term::IntralineWhiteSpaceChecker> intraline;
95 lexer::TermLexer<term::LineEndingChecker> lineend;
98 while (intraline.CheckToken(reader) && !reader->IsEof()) {
99 intraline.ReadToken(reader);
102 if (!lineend.CheckToken(reader)) {
103 unicode::UniString encoded_string;
104 THROW_EXCEPTION_(lexer::LexException,
105 unicode::Convert("must line ending after whitespaces"));
107 lineend.ReadToken(reader);
109 while (intraline.CheckToken(reader) && !reader->IsEof()) {
110 intraline.ReadToken(reader);
114 // 文字列の終了地点は、同一行の`"`か、\\を含む複数行後の対応する`"`となります。
115 // 対応する`"`が存在しないままreaderの末尾に到達すると、LexExceptionが
117 // また、\から行末まで空白文字のみが続き、次の行の最初の文字までが、改行か
118 // 空白のみである場合、その間の空白及び改行は無視され、文字列は継続している
120 lexer::Lexeme* lexer::StringLexer::Lex(reader::EncodingReader* reader) {
121 term::StringDelimiterChecker string_delimiter;
123 unicode::UniChar head(reader->Read());
124 if (!string_delimiter(head)) {
129 term::StarndardDelimiterChecker std_delimiter;
130 term::WhitespaceChecker white_delimiter;
131 term::LineEndingChecker lineending_checker;
133 unicode::UniString str;
134 bool syntax_ok = false;
136 while (!reader->IsEof() && !syntax_ok) {
137 unicode::UniChar tmp(reader->Peek());
139 if (string_delimiter(tmp)) {
145 if (lineending_checker(tmp)) {
146 while (!reader->IsEof() && (white_delimiter(tmp) || std_delimiter(tmp))) {
150 str.Append(unicode::UniChar(reader->Read()));
155 THROW_EXCEPTION_(lexer::LexException,
156 unicode::Convert("not found end of string."));
159 return new lexer::Lexeme(str, lexer::Lexeme::kString);