2 * Copyright (C) 2009 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #include "LiteralParser.h"
32 #include "UStringBuilder.h"
33 #include <wtf/ASCIICType.h>
38 LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token)
40 while (m_ptr < m_end && isASCIISpace(*m_ptr))
43 ASSERT(m_ptr <= m_end);
46 token.start = token.end = m_ptr;
49 token.type = TokError;
53 token.type = TokLBracket;
57 token.type = TokRBracket;
61 token.type = TokLParen;
65 token.type = TokRParen;
69 token.type = TokLBrace;
73 token.type = TokRBrace;
77 token.type = TokComma;
81 token.type = TokColon;
85 if (m_mode == StrictJSON)
86 return lexString<StrictJSON>(token);
87 return lexString<NonStrictJSON>(token);
89 if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') {
97 if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') {
99 token.type = TokFalse;
105 if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') {
107 token.type = TokNull;
123 return lexNumber(token);
128 template <LiteralParser::ParserMode mode> static inline bool isSafeStringCharacter(UChar c)
130 return (c >= ' ' && (mode == LiteralParser::StrictJSON || c <= 0xff) && c != '\\' && c != '"') || c == '\t';
133 // "inline" is required here to help WINSCW compiler resolve specialized argument in templated functions.
134 template <LiteralParser::ParserMode mode> inline LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token)
137 const UChar* runStart;
138 UStringBuilder builder;
141 while (m_ptr < m_end && isSafeStringCharacter<mode>(*m_ptr))
143 if (runStart < m_ptr)
144 builder.append(runStart, m_ptr - runStart);
145 if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == '\\') {
155 builder.append('\\');
163 builder.append('\b');
167 builder.append('\f');
171 builder.append('\n');
175 builder.append('\r');
179 builder.append('\t');
184 if ((m_end - m_ptr) < 5) // uNNNN == 5 characters
186 for (int i = 1; i < 5; i++) {
187 if (!isASCIIHexDigit(m_ptr[i]))
190 builder.append(JSC::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4]));
198 } while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != '"');
200 if (m_ptr >= m_end || *m_ptr != '"')
203 token.stringToken = builder.toUString();
204 token.type = TokString;
209 LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token)
211 // ES5 and json.org define numbers as
218 // -? digit1-9 digits?
223 // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?
225 if (m_ptr < m_end && *m_ptr == '-') // -?
229 if (m_ptr < m_end && *m_ptr == '0') // 0
231 else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9]
234 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
240 if (m_ptr < m_end && *m_ptr == '.') {
243 if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
247 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
251 // ([eE][+-]? [0-9]+)?
252 if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE]
256 if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+'))
260 if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
264 while (m_ptr < m_end && isASCIIDigit(*m_ptr))
268 token.type = TokNumber;
270 Vector<char, 64> buffer(token.end - token.start + 1);
272 for (i = 0; i < token.end - token.start; i++) {
273 ASSERT(static_cast<char>(token.start[i]) == token.start[i]);
274 buffer[i] = static_cast<char>(token.start[i]);
278 token.numberToken = WTF::strtod(buffer.data(), &end);
279 ASSERT(buffer.data() + (token.end - token.start) == end);
283 JSValue LiteralParser::parse(ParserState initialState)
285 ParserState state = initialState;
286 MarkedArgumentBuffer objectStack;
288 Vector<ParserState, 16> stateStack;
289 Vector<Identifier, 16> identifierStack;
293 case StartParseArray: {
294 JSArray* array = constructEmptyArray(m_exec);
295 objectStack.append(array);
298 doParseArrayStartExpression:
299 case DoParseArrayStartExpression: {
300 TokenType lastToken = m_lexer.currentToken().type;
301 if (m_lexer.next() == TokRBracket) {
302 if (lastToken == TokComma)
305 lastValue = objectStack.last();
306 objectStack.removeLast();
310 stateStack.append(DoParseArrayEndExpression);
311 goto startParseExpression;
313 case DoParseArrayEndExpression: {
314 asArray(objectStack.last())->push(m_exec, lastValue);
316 if (m_lexer.currentToken().type == TokComma)
317 goto doParseArrayStartExpression;
319 if (m_lexer.currentToken().type != TokRBracket)
323 lastValue = objectStack.last();
324 objectStack.removeLast();
328 case StartParseObject: {
329 JSObject* object = constructEmptyObject(m_exec);
330 objectStack.append(object);
332 TokenType type = m_lexer.next();
333 if (type == TokString) {
334 Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
337 if (m_lexer.next() != TokColon)
341 identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
342 stateStack.append(DoParseObjectEndExpression);
343 goto startParseExpression;
344 } else if (type != TokRBrace)
347 lastValue = objectStack.last();
348 objectStack.removeLast();
351 doParseObjectStartExpression:
352 case DoParseObjectStartExpression: {
353 TokenType type = m_lexer.next();
354 if (type != TokString)
356 Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
359 if (m_lexer.next() != TokColon)
363 identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
364 stateStack.append(DoParseObjectEndExpression);
365 goto startParseExpression;
367 case DoParseObjectEndExpression:
369 asObject(objectStack.last())->putDirect(identifierStack.last(), lastValue);
370 identifierStack.removeLast();
371 if (m_lexer.currentToken().type == TokComma)
372 goto doParseObjectStartExpression;
373 if (m_lexer.currentToken().type != TokRBrace)
376 lastValue = objectStack.last();
377 objectStack.removeLast();
380 startParseExpression:
381 case StartParseExpression: {
382 switch (m_lexer.currentToken().type) {
384 goto startParseArray;
386 goto startParseObject;
388 Lexer::LiteralParserToken stringToken = m_lexer.currentToken();
390 lastValue = jsString(m_exec, stringToken.stringToken);
394 Lexer::LiteralParserToken numberToken = m_lexer.currentToken();
396 lastValue = jsNumber(m_exec, numberToken.numberToken);
401 lastValue = jsNull();
406 lastValue = jsBoolean(true);
411 lastValue = jsBoolean(false);
420 case StartParseStatement: {
421 switch (m_lexer.currentToken().type) {
425 goto startParseExpression;
429 stateStack.append(StartParseStatementEndStatement);
430 goto startParseExpression;
436 case StartParseStatementEndStatement: {
437 ASSERT(stateStack.isEmpty());
438 if (m_lexer.currentToken().type != TokRParen)
440 if (m_lexer.next() == TokEnd)
445 ASSERT_NOT_REACHED();
447 if (stateStack.isEmpty())
449 state = stateStack.last();
450 stateStack.removeLast();