OSDN Git Service

implement parser for rfc733 format date text.
[bbk/bchan.git] / src / parselib.c
1 /*
2  * parselib.c
3  *
4  * Copyright (c) 2009-2010 project bchan
5  *
6  * This software is provided 'as-is', without any express or implied
7  * warranty. In no event will the authors be held liable for any damages
8  * arising from the use of this software.
9  *
10  * Permission is granted to anyone to use this software for any purpose,
11  * including commercial applications, and to alter it and redistribute it
12  * freely, subject to the following restrictions:
13  *
14  * 1. The origin of this software must not be misrepresented; you must not
15  *    claim that you wrote the original software. If you use this software
16  *    in a product, an acknowledgment in the product documentation would be
17  *    appreciated but is not required.
18  *
19  * 2. Altered source versions must be plainly marked as such, and must not be
20  *    misrepresented as being the original software.
21  *
22  * 3. This notice may not be removed or altered from any source
23  *    distribution.
24  *
25  */
26
27 #include        <basic.h>
28 #include        <bstdlib.h>
29 #include        <bstdio.h>
30 #include        <bstring.h>
31 #include        <bctype.h>
32
33 #include    "parselib.h"
34
35 #ifdef BCHAN_CONFIG_DEBUG
36 # define DP(arg) printf arg
37 # define DP_ER(msg, err) printf("%s (%d/%x)\n", msg, err>>16, err)
38 #else
39 # define DP(arg) /**/
40 # define DP_ER(msg, err) /**/
41 #endif
42
43 #define TOKENCHECKER_FLAG_NOTEXIST 0x00000001
44 #define TOKENCHECKER_FLAG_AFTERENDCHAR 0x00000002
45
46 EXPORT VOID tokenchecker_initialize(tokenchecker_t *checker, tokenchecker_valuetuple_t *namelist, W namelistnum, B *endchars)
47 {
48         checker->namelist = namelist;
49         checker->namelistnum = namelistnum;
50         checker->endtokens = endchars;
51         checker->stringindex = 0;
52         checker->listindex_start = 0;
53         checker->listindex_end = checker->namelistnum;
54         checker->flag = 0;
55 }
56
57 EXPORT VOID tokenchecker_clear(tokenchecker_t *checker)
58 {
59         checker->stringindex = 0;
60         checker->listindex_start = 0;
61         checker->listindex_end = checker->namelistnum;
62         checker->flag = 0;
63 }
64
65 EXPORT W tokenchecker_inputchar(tokenchecker_t *checker, UB c, W *val)
66 {
67         W i;
68         tokenchecker_valuetuple_t *namelist = checker->namelist;
69
70         if ((checker->flag & TOKENCHECKER_FLAG_AFTERENDCHAR) != 0) {
71                 return TOKENCHECKER_AFTER_END;
72         }
73
74         for (i = 0;; i++) {
75                 if ((checker->endtokens)[i] == '\0') {
76                         break;
77                 }
78                 if (c == (checker->endtokens)[i]) {
79                         checker->flag |= TOKENCHECKER_FLAG_AFTERENDCHAR;
80                         if ((checker->flag & TOKENCHECKER_FLAG_NOTEXIST) != 0) {
81                                 return TOKENCHECKER_NOMATCH;
82                         }
83                         if ((namelist[checker->listindex_start]).name[checker->stringindex] == '\0') {
84                                 /*List's Name End and receive EndToken = found match string*/
85                                 *val = (namelist[checker->listindex_start]).val;
86                                 return TOKENCHECKER_DETERMINE;
87                         }
88                         /*List's Name continue but receive endtoken.*/
89                         return TOKENCHECKER_NOMATCH;
90                 }
91         }
92
93         if ((checker->flag & TOKENCHECKER_FLAG_NOTEXIST) != 0) {
94                 return TOKENCHECKER_CONTINUE_NOMATCH;
95         }
96
97         for (i = checker->listindex_start; i < checker->listindex_end; i++) {
98                 if ((namelist[i]).name[checker->stringindex] == c) {
99                         break;
100                 }
101         }
102         if (i == checker->listindex_end) { /*receive char is not matched.*/
103                 checker->flag &= TOKENCHECKER_FLAG_NOTEXIST;
104                 return TOKENCHECKER_CONTINUE_NOMATCH;
105         }
106         checker->listindex_start = i;
107         for (i = i+1; i < checker->listindex_end; i++) {
108                 if ((namelist[i]).name[checker->stringindex] != c) {
109                         break;
110                 }
111         }
112         checker->listindex_end = i;
113
114         if ((namelist[checker->listindex_start]).name[checker->stringindex] == '\0') {
115                 /*Don't recive endtoken but List's Name is end.*/
116                 checker->flag |= TOKENCHECKER_FLAG_NOTEXIST;
117                 return TOKENCHECKER_CONTINUE_NOMATCH;
118         }
119         checker->stringindex++;
120
121         return TOKENCHECKER_CONTINUE;
122 }
123
124 EXPORT VOID tokenchecker_getlastmatchedstring(tokenchecker_t *checker, UB **str, W *len)
125 {
126         *str = (checker->namelist[checker->listindex_start]).name;
127         *len = checker->stringindex;
128 }
129
130 LOCAL tokenchecker_valuetuple_t nList_nameref[] = {
131   {"amp", '&'},
132   {"gt", '>'},
133   {"lt", '<'},
134   {"quot", '"'},
135 };
136 LOCAL B eToken_nameref[] = ";";
137
138 LOCAL W charreferparser_digitchartointeger(UB ch)
139 {
140         return ch - '0';
141 }
142
143 LOCAL W charreferparser_hexchartointeger(UB ch)
144 {
145         if(('a' <= ch)&&(ch <= 'h')){
146                 return ch - 'a' + 10;
147         }
148         if(('A' <= ch)&&(ch <= 'H')){
149                 return ch - 'A' + 10;
150         }
151         return charreferparser_digitchartointeger(ch);
152 }
153
154 EXPORT charreferparser_result_t charreferparser_parsechar(charreferparser_t *parser, UB ch)
155 {
156         W ret, val;
157
158         switch (parser->state) {
159         case START:
160                 if (ch != '&') {
161                         return CHARREFERPARSER_RESULT_INVALID;
162                 }
163                 parser->state = RECIEVE_AMP;
164                 return CHARREFERPARSER_RESULT_CONTINUE;
165         case RECIEVE_AMP:
166                 if (ch == '#') {
167                         parser->state = RECIEVE_NUMBER;
168                         return CHARREFERPARSER_RESULT_CONTINUE;
169                 }
170                 if (ch == ';') {
171                         return CHARREFERPARSER_RESULT_INVALID;
172                 }
173                 parser->state = NAMED;
174                 parser->charnumber = -1;
175                 ret = tokenchecker_inputchar(&parser->named, ch, &val);
176                 if (ret != TOKENCHECKER_CONTINUE) {
177                         return CHARREFERPARSER_RESULT_INVALID;
178                 }
179                 return CHARREFERPARSER_RESULT_CONTINUE;
180         case RECIEVE_NUMBER:
181                 if ((ch == 'x')||(ch == 'X')) {
182                         parser->state = NUMERIC_HEXADECIMAL;
183                         return CHARREFERPARSER_RESULT_CONTINUE;
184                 }
185                 if (isdigit(ch)) {
186                         parser->state = NUMERIC_DECIMAL;
187                         parser->charnumber = charreferparser_digitchartointeger(ch);
188                         return CHARREFERPARSER_RESULT_CONTINUE;
189                 }
190                 return CHARREFERPARSER_RESULT_INVALID;
191         case NUMERIC_DECIMAL:
192                 if (isdigit(ch)) {
193                         parser->charnumber *= 10;
194                         parser->charnumber += charreferparser_digitchartointeger(ch);
195                         return CHARREFERPARSER_RESULT_CONTINUE;
196                 }
197                 if (ch == ';') {
198                         parser->state = DETERMINED;
199                         return CHARREFERPARSER_RESULT_DETERMINE;
200                 }
201                 return CHARREFERPARSER_RESULT_INVALID;
202         case NUMERIC_HEXADECIMAL:
203                 if (isxdigit(ch)) {
204                         parser->charnumber *= 16;
205                         parser->charnumber += charreferparser_hexchartointeger(ch);
206                         return CHARREFERPARSER_RESULT_CONTINUE;
207                 }
208                 if (ch == ';') {
209                         parser->state = DETERMINED;
210                         return CHARREFERPARSER_RESULT_DETERMINE;
211                 }
212                 return CHARREFERPARSER_RESULT_INVALID;
213         case NAMED:
214                 ret = tokenchecker_inputchar(&parser->named, ch, &val);
215                 if (ret == TOKENCHECKER_DETERMINE) {
216                         parser->charnumber = val;
217                         parser->state = DETERMINED;
218                         return CHARREFERPARSER_RESULT_DETERMINE;
219                 } else if (ret != TOKENCHECKER_CONTINUE) {
220                         return CHARREFERPARSER_RESULT_INVALID;
221                 }
222                 return CHARREFERPARSER_RESULT_CONTINUE;
223         case INVALID:
224                 return CHARREFERPARSER_RESULT_INVALID;
225         case DETERMINED:
226                 return CHARREFERPARSER_RESULT_INVALID;
227         }
228
229         return CHARREFERPARSER_RESULT_INVALID;
230 }
231
232 EXPORT W charreferparser_getcharnumber(charreferparser_t *parser)
233 {
234         if (parser->state != DETERMINED) {
235                 return -1;
236         }
237         return parser->charnumber;
238 }
239
240 EXPORT VOID charreferparser_getlastmatchedstring(charreferparser_t *parser, UB **str, W *len)
241 {
242         tokenchecker_getlastmatchedstring(&parser->named, str, len);
243 }
244
245 EXPORT VOID charreferparser_resetstate(charreferparser_t *parser)
246 {
247         parser->state = START;
248         parser->charnumber = 0;
249         tokenchecker_clear(&(parser->named));
250 }
251
252 EXPORT W charreferparser_initialize(charreferparser_t *parser)
253 {
254         parser->state = START;
255         parser->charnumber = 0;
256         tokenchecker_initialize(&(parser->named), nList_nameref, 4, eToken_nameref);
257         return 0;
258 }
259
260 EXPORT VOID charreferparser_finalize(charreferparser_t *parser)
261 {
262 }