| /* |
| * Copyright (C) 2009 Apple Inc. All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
| * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
| * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include "config.h" |
| #include "LiteralParser.h" |
| |
| #include "JSArray.h" |
| #include "JSString.h" |
| #include "Lexer.h" |
| #include "UStringBuilder.h" |
| #include <wtf/ASCIICType.h> |
| #include <wtf/dtoa.h> |
| |
| namespace JSC { |
| |
| static inline bool isJSONWhiteSpace(const UChar& c) |
| { |
| // The JSON RFC 4627 defines a list of allowed characters to be considered |
| // insignificant white space: http://www.ietf.org/rfc/rfc4627.txt (2. JSON Grammar). |
| return c == ' ' || c == 0x9 || c == 0xA || c == 0xD; |
| } |
| |
| LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token) |
| { |
| while (m_ptr < m_end && isJSONWhiteSpace(*m_ptr)) |
| ++m_ptr; |
| |
| ASSERT(m_ptr <= m_end); |
| if (m_ptr >= m_end) { |
| token.type = TokEnd; |
| token.start = token.end = m_ptr; |
| return TokEnd; |
| } |
| token.type = TokError; |
| token.start = m_ptr; |
| switch (*m_ptr) { |
| case '[': |
| token.type = TokLBracket; |
| token.end = ++m_ptr; |
| return TokLBracket; |
| case ']': |
| token.type = TokRBracket; |
| token.end = ++m_ptr; |
| return TokRBracket; |
| case '(': |
| token.type = TokLParen; |
| token.end = ++m_ptr; |
| return TokLBracket; |
| case ')': |
| token.type = TokRParen; |
| token.end = ++m_ptr; |
| return TokRBracket; |
| case '{': |
| token.type = TokLBrace; |
| token.end = ++m_ptr; |
| return TokLBrace; |
| case '}': |
| token.type = TokRBrace; |
| token.end = ++m_ptr; |
| return TokRBrace; |
| case ',': |
| token.type = TokComma; |
| token.end = ++m_ptr; |
| return TokComma; |
| case ':': |
| token.type = TokColon; |
| token.end = ++m_ptr; |
| return TokColon; |
| case '"': |
| if (m_mode == StrictJSON) |
| return lexString<StrictJSON>(token); |
| return lexString<NonStrictJSON>(token); |
| case 't': |
| if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') { |
| m_ptr += 4; |
| token.type = TokTrue; |
| token.end = m_ptr; |
| return TokTrue; |
| } |
| break; |
| case 'f': |
| if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') { |
| m_ptr += 5; |
| token.type = TokFalse; |
| token.end = m_ptr; |
| return TokFalse; |
| } |
| break; |
| case 'n': |
| if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') { |
| m_ptr += 4; |
| token.type = TokNull; |
| token.end = m_ptr; |
| return TokNull; |
| } |
| break; |
| case '-': |
| case '0': |
| case '1': |
| case '2': |
| case '3': |
| case '4': |
| case '5': |
| case '6': |
| case '7': |
| case '8': |
| case '9': |
| return lexNumber(token); |
| } |
| return TokError; |
| } |
| |
| template <LiteralParser::ParserMode mode> static inline bool isSafeStringCharacter(UChar c) |
| { |
| return (c >= ' ' && (mode == LiteralParser::StrictJSON || c <= 0xff) && c != '\\' && c != '"') || c == '\t'; |
| } |
| |
| // "inline" is required here to help WINSCW compiler resolve specialized argument in templated functions. |
| template <LiteralParser::ParserMode mode> inline LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token) |
| { |
| ++m_ptr; |
| const UChar* runStart; |
| UStringBuilder builder; |
| do { |
| runStart = m_ptr; |
| while (m_ptr < m_end && isSafeStringCharacter<mode>(*m_ptr)) |
| ++m_ptr; |
| if (runStart < m_ptr) |
| builder.append(runStart, m_ptr - runStart); |
| if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == '\\') { |
| ++m_ptr; |
| if (m_ptr >= m_end) |
| return TokError; |
| switch (*m_ptr) { |
| case '"': |
| builder.append('"'); |
| m_ptr++; |
| break; |
| case '\\': |
| builder.append('\\'); |
| m_ptr++; |
| break; |
| case '/': |
| builder.append('/'); |
| m_ptr++; |
| break; |
| case 'b': |
| builder.append('\b'); |
| m_ptr++; |
| break; |
| case 'f': |
| builder.append('\f'); |
| m_ptr++; |
| break; |
| case 'n': |
| builder.append('\n'); |
| m_ptr++; |
| break; |
| case 'r': |
| builder.append('\r'); |
| m_ptr++; |
| break; |
| case 't': |
| builder.append('\t'); |
| m_ptr++; |
| break; |
| |
| case 'u': |
| if ((m_end - m_ptr) < 5) // uNNNN == 5 characters |
| return TokError; |
| for (int i = 1; i < 5; i++) { |
| if (!isASCIIHexDigit(m_ptr[i])) |
| return TokError; |
| } |
| builder.append(JSC::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4])); |
| m_ptr += 5; |
| break; |
| |
| default: |
| return TokError; |
| } |
| } |
| } while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != '"'); |
| |
| if (m_ptr >= m_end || *m_ptr != '"') |
| return TokError; |
| |
| token.stringToken = builder.toUString(); |
| token.type = TokString; |
| token.end = ++m_ptr; |
| return TokString; |
| } |
| |
| LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token) |
| { |
| // ES5 and json.org define numbers as |
| // number |
| // int |
| // int frac? exp? |
| // |
| // int |
| // -? 0 |
| // -? digit1-9 digits? |
| // |
| // digits |
| // digit digits? |
| // |
| // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)? |
| |
| if (m_ptr < m_end && *m_ptr == '-') // -? |
| ++m_ptr; |
| |
| // (0 | [1-9][0-9]*) |
| if (m_ptr < m_end && *m_ptr == '0') // 0 |
| ++m_ptr; |
| else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9] |
| ++m_ptr; |
| // [0-9]* |
| while (m_ptr < m_end && isASCIIDigit(*m_ptr)) |
| ++m_ptr; |
| } else |
| return TokError; |
| |
| // ('.' [0-9]+)? |
| if (m_ptr < m_end && *m_ptr == '.') { |
| ++m_ptr; |
| // [0-9]+ |
| if (m_ptr >= m_end || !isASCIIDigit(*m_ptr)) |
| return TokError; |
| |
| ++m_ptr; |
| while (m_ptr < m_end && isASCIIDigit(*m_ptr)) |
| ++m_ptr; |
| } |
| |
| // ([eE][+-]? [0-9]+)? |
| if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE] |
| ++m_ptr; |
| |
| // [-+]? |
| if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+')) |
| ++m_ptr; |
| |
| // [0-9]+ |
| if (m_ptr >= m_end || !isASCIIDigit(*m_ptr)) |
| return TokError; |
| |
| ++m_ptr; |
| while (m_ptr < m_end && isASCIIDigit(*m_ptr)) |
| ++m_ptr; |
| } |
| |
| token.type = TokNumber; |
| token.end = m_ptr; |
| Vector<char, 64> buffer(token.end - token.start + 1); |
| int i; |
| for (i = 0; i < token.end - token.start; i++) { |
| ASSERT(static_cast<char>(token.start[i]) == token.start[i]); |
| buffer[i] = static_cast<char>(token.start[i]); |
| } |
| buffer[i] = 0; |
| char* end; |
| token.numberToken = WTF::strtod(buffer.data(), &end); |
| ASSERT(buffer.data() + (token.end - token.start) == end); |
| return TokNumber; |
| } |
| |
| JSValue LiteralParser::parse(ParserState initialState) |
| { |
| ParserState state = initialState; |
| MarkedArgumentBuffer objectStack; |
| JSValue lastValue; |
| Vector<ParserState, 16> stateStack; |
| Vector<Identifier, 16> identifierStack; |
| while (1) { |
| switch(state) { |
| startParseArray: |
| case StartParseArray: { |
| JSArray* array = constructEmptyArray(m_exec); |
| objectStack.append(array); |
| // fallthrough |
| } |
| doParseArrayStartExpression: |
| case DoParseArrayStartExpression: { |
| TokenType lastToken = m_lexer.currentToken().type; |
| if (m_lexer.next() == TokRBracket) { |
| if (lastToken == TokComma) |
| return JSValue(); |
| m_lexer.next(); |
| lastValue = objectStack.last(); |
| objectStack.removeLast(); |
| break; |
| } |
| |
| stateStack.append(DoParseArrayEndExpression); |
| goto startParseExpression; |
| } |
| case DoParseArrayEndExpression: { |
| asArray(objectStack.last())->push(m_exec, lastValue); |
| |
| if (m_lexer.currentToken().type == TokComma) |
| goto doParseArrayStartExpression; |
| |
| if (m_lexer.currentToken().type != TokRBracket) |
| return JSValue(); |
| |
| m_lexer.next(); |
| lastValue = objectStack.last(); |
| objectStack.removeLast(); |
| break; |
| } |
| startParseObject: |
| case StartParseObject: { |
| JSObject* object = constructEmptyObject(m_exec); |
| objectStack.append(object); |
| |
| TokenType type = m_lexer.next(); |
| if (type == TokString) { |
| Lexer::LiteralParserToken identifierToken = m_lexer.currentToken(); |
| |
| // Check for colon |
| if (m_lexer.next() != TokColon) |
| return JSValue(); |
| |
| m_lexer.next(); |
| identifierStack.append(Identifier(m_exec, identifierToken.stringToken)); |
| stateStack.append(DoParseObjectEndExpression); |
| goto startParseExpression; |
| } else if (type != TokRBrace) |
| return JSValue(); |
| m_lexer.next(); |
| lastValue = objectStack.last(); |
| objectStack.removeLast(); |
| break; |
| } |
| doParseObjectStartExpression: |
| case DoParseObjectStartExpression: { |
| TokenType type = m_lexer.next(); |
| if (type != TokString) |
| return JSValue(); |
| Lexer::LiteralParserToken identifierToken = m_lexer.currentToken(); |
| |
| // Check for colon |
| if (m_lexer.next() != TokColon) |
| return JSValue(); |
| |
| m_lexer.next(); |
| identifierStack.append(Identifier(m_exec, identifierToken.stringToken)); |
| stateStack.append(DoParseObjectEndExpression); |
| goto startParseExpression; |
| } |
| case DoParseObjectEndExpression: |
| { |
| asObject(objectStack.last())->putDirect(m_exec->globalData(), identifierStack.last(), lastValue); |
| identifierStack.removeLast(); |
| if (m_lexer.currentToken().type == TokComma) |
| goto doParseObjectStartExpression; |
| if (m_lexer.currentToken().type != TokRBrace) |
| return JSValue(); |
| m_lexer.next(); |
| lastValue = objectStack.last(); |
| objectStack.removeLast(); |
| break; |
| } |
| startParseExpression: |
| case StartParseExpression: { |
| switch (m_lexer.currentToken().type) { |
| case TokLBracket: |
| goto startParseArray; |
| case TokLBrace: |
| goto startParseObject; |
| case TokString: { |
| Lexer::LiteralParserToken stringToken = m_lexer.currentToken(); |
| m_lexer.next(); |
| lastValue = jsString(m_exec, stringToken.stringToken); |
| break; |
| } |
| case TokNumber: { |
| Lexer::LiteralParserToken numberToken = m_lexer.currentToken(); |
| m_lexer.next(); |
| lastValue = jsNumber(numberToken.numberToken); |
| break; |
| } |
| case TokNull: |
| m_lexer.next(); |
| lastValue = jsNull(); |
| break; |
| |
| case TokTrue: |
| m_lexer.next(); |
| lastValue = jsBoolean(true); |
| break; |
| |
| case TokFalse: |
| m_lexer.next(); |
| lastValue = jsBoolean(false); |
| break; |
| |
| default: |
| // Error |
| return JSValue(); |
| } |
| break; |
| } |
| case StartParseStatement: { |
| switch (m_lexer.currentToken().type) { |
| case TokLBracket: |
| case TokNumber: |
| case TokString: |
| goto startParseExpression; |
| |
| case TokLParen: { |
| m_lexer.next(); |
| stateStack.append(StartParseStatementEndStatement); |
| goto startParseExpression; |
| } |
| default: |
| return JSValue(); |
| } |
| } |
| case StartParseStatementEndStatement: { |
| ASSERT(stateStack.isEmpty()); |
| if (m_lexer.currentToken().type != TokRParen) |
| return JSValue(); |
| if (m_lexer.next() == TokEnd) |
| return lastValue; |
| return JSValue(); |
| } |
| default: |
| ASSERT_NOT_REACHED(); |
| } |
| if (stateStack.isEmpty()) |
| return lastValue; |
| state = stateStack.last(); |
| stateStack.removeLast(); |
| continue; |
| } |
| } |
| |
| } |