| // Copyright 2012 the V8 project authors. All rights reserved. |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are |
| // met: |
| // |
| // * Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above |
| // copyright notice, this list of conditions and the following |
| // disclaimer in the documentation and/or other materials provided |
| // with the distribution. |
| // * Neither the name of Google Inc. nor the names of its |
| // contributors may be used to endorse or promote products derived |
| // from this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| #ifndef V8_PREPARSER_H |
| #define V8_PREPARSER_H |
| |
| #include "hashmap.h" |
| #include "token.h" |
| #include "scanner.h" |
| |
| namespace v8 { |
| |
| namespace internal { |
| class UnicodeCache; |
| } |
| |
| namespace preparser { |
| |
| typedef uint8_t byte; |
| |
| // Preparsing checks a JavaScript program and emits preparse-data that helps |
| // a later parsing to be faster. |
| // See preparse-data-format.h for the data format. |
| |
| // The PreParser checks that the syntax follows the grammar for JavaScript, |
| // and collects some information about the program along the way. |
| // The grammar check is only performed in order to understand the program |
| // sufficiently to deduce some information about it, that can be used |
| // to speed up later parsing. Finding errors is not the goal of pre-parsing, |
| // rather it is to speed up properly written and correct programs. |
| // That means that contextual checks (like a label being declared where |
| // it is used) are generally omitted. |
| |
| namespace i = v8::internal; |
| |
| class DuplicateFinder { |
| public: |
| explicit DuplicateFinder(i::UnicodeCache* constants) |
| : unicode_constants_(constants), |
| backing_store_(16), |
| map_(&Match) { } |
| |
| int AddAsciiSymbol(i::Vector<const char> key, int value); |
| int AddUtf16Symbol(i::Vector<const uint16_t> key, int value); |
| // Add a a number literal by converting it (if necessary) |
| // to the string that ToString(ToNumber(literal)) would generate. |
| // and then adding that string with AddAsciiSymbol. |
| // This string is the actual value used as key in an object literal, |
| // and the one that must be different from the other keys. |
| int AddNumber(i::Vector<const char> key, int value); |
| |
| private: |
| int AddSymbol(i::Vector<const byte> key, bool is_ascii, int value); |
| // Backs up the key and its length in the backing store. |
| // The backup is stored with a base 127 encoding of the |
| // length (plus a bit saying whether the string is ASCII), |
| // followed by the bytes of the key. |
| byte* BackupKey(i::Vector<const byte> key, bool is_ascii); |
| |
| // Compare two encoded keys (both pointing into the backing store) |
| // for having the same base-127 encoded lengths and ASCII-ness, |
| // and then having the same 'length' bytes following. |
| static bool Match(void* first, void* second); |
| // Creates a hash from a sequence of bytes. |
| static uint32_t Hash(i::Vector<const byte> key, bool is_ascii); |
| // Checks whether a string containing a JS number is its canonical |
| // form. |
| static bool IsNumberCanonical(i::Vector<const char> key); |
| |
| // Size of buffer. Sufficient for using it to call DoubleToCString in |
| // from conversions.h. |
| static const int kBufferSize = 100; |
| |
| i::UnicodeCache* unicode_constants_; |
| // Backing store used to store strings used as hashmap keys. |
| i::SequenceCollector<unsigned char> backing_store_; |
| i::HashMap map_; |
| // Buffer used for string->number->canonical string conversions. |
| char number_buffer_[kBufferSize]; |
| }; |
| |
| |
| class PreParser { |
| public: |
| enum PreParseResult { |
| kPreParseStackOverflow, |
| kPreParseSuccess |
| }; |
| |
| |
| PreParser(i::Scanner* scanner, |
| i::ParserRecorder* log, |
| uintptr_t stack_limit, |
| bool allow_lazy, |
| bool allow_natives_syntax, |
| bool allow_modules) |
| : scanner_(scanner), |
| log_(log), |
| scope_(NULL), |
| stack_limit_(stack_limit), |
| strict_mode_violation_location_(i::Scanner::Location::invalid()), |
| strict_mode_violation_type_(NULL), |
| stack_overflow_(false), |
| allow_lazy_(allow_lazy), |
| allow_modules_(allow_modules), |
| allow_natives_syntax_(allow_natives_syntax), |
| parenthesized_function_(false), |
| harmony_scoping_(scanner->HarmonyScoping()) { } |
| |
| ~PreParser() {} |
| |
| // Pre-parse the program from the character stream; returns true on |
| // success (even if parsing failed, the pre-parse data successfully |
| // captured the syntax error), and false if a stack-overflow happened |
| // during parsing. |
| static PreParseResult PreParseProgram(i::Scanner* scanner, |
| i::ParserRecorder* log, |
| int flags, |
| uintptr_t stack_limit) { |
| bool allow_lazy = (flags & i::kAllowLazy) != 0; |
| bool allow_natives_syntax = (flags & i::kAllowNativesSyntax) != 0; |
| bool allow_modules = (flags & i::kAllowModules) != 0; |
| return PreParser(scanner, log, stack_limit, allow_lazy, |
| allow_natives_syntax, allow_modules).PreParse(); |
| } |
| |
| // Parses a single function literal, from the opening parentheses before |
| // parameters to the closing brace after the body. |
| // Returns a FunctionEntry describing the body of the funciton in enough |
| // detail that it can be lazily compiled. |
| // The scanner is expected to have matched the "function" keyword and |
| // parameters, and have consumed the initial '{'. |
| // At return, unless an error occured, the scanner is positioned before the |
| // the final '}'. |
| PreParseResult PreParseLazyFunction(i::LanguageMode mode, |
| i::ParserRecorder* log); |
| |
| private: |
| // Used to detect duplicates in object literals. Each of the values |
| // kGetterProperty, kSetterProperty and kValueProperty represents |
| // a type of object literal property. When parsing a property, its |
| // type value is stored in the DuplicateFinder for the property name. |
| // Values are chosen so that having intersection bits means the there is |
| // an incompatibility. |
| // I.e., you can add a getter to a property that already has a setter, since |
| // kGetterProperty and kSetterProperty doesn't intersect, but not if it |
| // already has a getter or a value. Adding the getter to an existing |
| // setter will store the value (kGetterProperty | kSetterProperty), which |
| // is incompatible with adding any further properties. |
| enum PropertyType { |
| kNone = 0, |
| // Bit patterns representing different object literal property types. |
| kGetterProperty = 1, |
| kSetterProperty = 2, |
| kValueProperty = 7, |
| // Helper constants. |
| kValueFlag = 4 |
| }; |
| |
| // Checks the type of conflict based on values coming from PropertyType. |
| bool HasConflict(int type1, int type2) { return (type1 & type2) != 0; } |
| bool IsDataDataConflict(int type1, int type2) { |
| return ((type1 & type2) & kValueFlag) != 0; |
| } |
| bool IsDataAccessorConflict(int type1, int type2) { |
| return ((type1 ^ type2) & kValueFlag) != 0; |
| } |
| bool IsAccessorAccessorConflict(int type1, int type2) { |
| return ((type1 | type2) & kValueFlag) == 0; |
| } |
| |
| |
| void CheckDuplicate(DuplicateFinder* finder, |
| i::Token::Value property, |
| int type, |
| bool* ok); |
| |
| // These types form an algebra over syntactic categories that is just |
| // rich enough to let us recognize and propagate the constructs that |
| // are either being counted in the preparser data, or is important |
| // to throw the correct syntax error exceptions. |
| |
| enum ScopeType { |
| kTopLevelScope, |
| kFunctionScope |
| }; |
| |
| enum VariableDeclarationContext { |
| kSourceElement, |
| kStatement, |
| kForStatement |
| }; |
| |
| // If a list of variable declarations includes any initializers. |
| enum VariableDeclarationProperties { |
| kHasInitializers, |
| kHasNoInitializers |
| }; |
| |
| class Expression; |
| |
| class Identifier { |
| public: |
| static Identifier Default() { |
| return Identifier(kUnknownIdentifier); |
| } |
| static Identifier Eval() { |
| return Identifier(kEvalIdentifier); |
| } |
| static Identifier Arguments() { |
| return Identifier(kArgumentsIdentifier); |
| } |
| static Identifier FutureReserved() { |
| return Identifier(kFutureReservedIdentifier); |
| } |
| static Identifier FutureStrictReserved() { |
| return Identifier(kFutureStrictReservedIdentifier); |
| } |
| bool IsEval() { return type_ == kEvalIdentifier; } |
| bool IsArguments() { return type_ == kArgumentsIdentifier; } |
| bool IsEvalOrArguments() { return type_ >= kEvalIdentifier; } |
| bool IsFutureReserved() { return type_ == kFutureReservedIdentifier; } |
| bool IsFutureStrictReserved() { |
| return type_ == kFutureStrictReservedIdentifier; |
| } |
| bool IsValidStrictVariable() { return type_ == kUnknownIdentifier; } |
| |
| private: |
| enum Type { |
| kUnknownIdentifier, |
| kFutureReservedIdentifier, |
| kFutureStrictReservedIdentifier, |
| kEvalIdentifier, |
| kArgumentsIdentifier |
| }; |
| explicit Identifier(Type type) : type_(type) { } |
| Type type_; |
| |
| friend class Expression; |
| }; |
| |
| // Bits 0 and 1 are used to identify the type of expression: |
| // If bit 0 is set, it's an identifier. |
| // if bit 1 is set, it's a string literal. |
| // If neither is set, it's no particular type, and both set isn't |
| // use yet. |
| // Bit 2 is used to mark the expression as being parenthesized, |
| // so "(foo)" isn't recognized as a pure identifier (and possible label). |
| class Expression { |
| public: |
| static Expression Default() { |
| return Expression(kUnknownExpression); |
| } |
| |
| static Expression FromIdentifier(Identifier id) { |
| return Expression(kIdentifierFlag | (id.type_ << kIdentifierShift)); |
| } |
| |
| static Expression StringLiteral() { |
| return Expression(kUnknownStringLiteral); |
| } |
| |
| static Expression UseStrictStringLiteral() { |
| return Expression(kUseStrictString); |
| } |
| |
| static Expression This() { |
| return Expression(kThisExpression); |
| } |
| |
| static Expression ThisProperty() { |
| return Expression(kThisPropertyExpression); |
| } |
| |
| static Expression StrictFunction() { |
| return Expression(kStrictFunctionExpression); |
| } |
| |
| bool IsIdentifier() { |
| return (code_ & kIdentifierFlag) != 0; |
| } |
| |
| // Only works corretly if it is actually an identifier expression. |
| PreParser::Identifier AsIdentifier() { |
| return PreParser::Identifier( |
| static_cast<PreParser::Identifier::Type>(code_ >> kIdentifierShift)); |
| } |
| |
| bool IsParenthesized() { |
| // If bit 0 or 1 is set, we interpret bit 2 as meaning parenthesized. |
| return (code_ & 7) > 4; |
| } |
| |
| bool IsRawIdentifier() { |
| return !IsParenthesized() && IsIdentifier(); |
| } |
| |
| bool IsStringLiteral() { return (code_ & kStringLiteralFlag) != 0; } |
| |
| bool IsRawStringLiteral() { |
| return !IsParenthesized() && IsStringLiteral(); |
| } |
| |
| bool IsUseStrictLiteral() { |
| return (code_ & kStringLiteralMask) == kUseStrictString; |
| } |
| |
| bool IsThis() { |
| return code_ == kThisExpression; |
| } |
| |
| bool IsThisProperty() { |
| return code_ == kThisPropertyExpression; |
| } |
| |
| bool IsStrictFunction() { |
| return code_ == kStrictFunctionExpression; |
| } |
| |
| Expression Parenthesize() { |
| int type = code_ & 3; |
| if (type != 0) { |
| // Identifiers and string literals can be parenthesized. |
| // They no longer work as labels or directive prologues, |
| // but are still recognized in other contexts. |
| return Expression(code_ | kParentesizedExpressionFlag); |
| } |
| // For other types of expressions, it's not important to remember |
| // the parentheses. |
| return *this; |
| } |
| |
| private: |
| // First two/three bits are used as flags. |
| // Bit 0 and 1 represent identifiers or strings literals, and are |
| // mutually exclusive, but can both be absent. |
| // If bit 0 or 1 are set, bit 2 marks that the expression has |
| // been wrapped in parentheses (a string literal can no longer |
| // be a directive prologue, and an identifier can no longer be |
| // a label. |
| enum { |
| kUnknownExpression = 0, |
| // Identifiers |
| kIdentifierFlag = 1, // Used to detect labels. |
| kIdentifierShift = 3, |
| |
| kStringLiteralFlag = 2, // Used to detect directive prologue. |
| kUnknownStringLiteral = kStringLiteralFlag, |
| kUseStrictString = kStringLiteralFlag | 8, |
| kStringLiteralMask = kUseStrictString, |
| |
| kParentesizedExpressionFlag = 4, // Only if identifier or string literal. |
| |
| // Below here applies if neither identifier nor string literal. |
| kThisExpression = 4, |
| kThisPropertyExpression = 8, |
| kStrictFunctionExpression = 12 |
| }; |
| |
| explicit Expression(int expression_code) : code_(expression_code) { } |
| |
| int code_; |
| }; |
| |
| class Statement { |
| public: |
| static Statement Default() { |
| return Statement(kUnknownStatement); |
| } |
| |
| static Statement FunctionDeclaration() { |
| return Statement(kFunctionDeclaration); |
| } |
| |
| // Creates expression statement from expression. |
| // Preserves being an unparenthesized string literal, possibly |
| // "use strict". |
| static Statement ExpressionStatement(Expression expression) { |
| if (!expression.IsParenthesized()) { |
| if (expression.IsUseStrictLiteral()) { |
| return Statement(kUseStrictExpressionStatement); |
| } |
| if (expression.IsStringLiteral()) { |
| return Statement(kStringLiteralExpressionStatement); |
| } |
| } |
| return Default(); |
| } |
| |
| bool IsStringLiteral() { |
| return code_ != kUnknownStatement; |
| } |
| |
| bool IsUseStrictLiteral() { |
| return code_ == kUseStrictExpressionStatement; |
| } |
| |
| bool IsFunctionDeclaration() { |
| return code_ == kFunctionDeclaration; |
| } |
| |
| private: |
| enum Type { |
| kUnknownStatement, |
| kStringLiteralExpressionStatement, |
| kUseStrictExpressionStatement, |
| kFunctionDeclaration |
| }; |
| |
| explicit Statement(Type code) : code_(code) {} |
| Type code_; |
| }; |
| |
| enum SourceElements { |
| kUnknownSourceElements |
| }; |
| |
| typedef int Arguments; |
| |
| class Scope { |
| public: |
| Scope(Scope** variable, ScopeType type) |
| : variable_(variable), |
| prev_(*variable), |
| type_(type), |
| materialized_literal_count_(0), |
| expected_properties_(0), |
| with_nesting_count_(0), |
| language_mode_( |
| (prev_ != NULL) ? prev_->language_mode() : i::CLASSIC_MODE) { |
| *variable = this; |
| } |
| ~Scope() { *variable_ = prev_; } |
| void NextMaterializedLiteralIndex() { materialized_literal_count_++; } |
| void AddProperty() { expected_properties_++; } |
| ScopeType type() { return type_; } |
| int expected_properties() { return expected_properties_; } |
| int materialized_literal_count() { return materialized_literal_count_; } |
| bool IsInsideWith() { return with_nesting_count_ != 0; } |
| bool is_classic_mode() { |
| return language_mode_ == i::CLASSIC_MODE; |
| } |
| i::LanguageMode language_mode() { |
| return language_mode_; |
| } |
| void set_language_mode(i::LanguageMode language_mode) { |
| language_mode_ = language_mode; |
| } |
| void EnterWith() { with_nesting_count_++; } |
| void LeaveWith() { with_nesting_count_--; } |
| |
| private: |
| Scope** const variable_; |
| Scope* const prev_; |
| const ScopeType type_; |
| int materialized_literal_count_; |
| int expected_properties_; |
| int with_nesting_count_; |
| i::LanguageMode language_mode_; |
| }; |
| |
| // Preparse the program. Only called in PreParseProgram after creating |
| // the instance. |
| PreParseResult PreParse() { |
| Scope top_scope(&scope_, kTopLevelScope); |
| bool ok = true; |
| int start_position = scanner_->peek_location().beg_pos; |
| ParseSourceElements(i::Token::EOS, &ok); |
| if (stack_overflow_) return kPreParseStackOverflow; |
| if (!ok) { |
| ReportUnexpectedToken(scanner_->current_token()); |
| } else if (!scope_->is_classic_mode()) { |
| CheckOctalLiteral(start_position, scanner_->location().end_pos, &ok); |
| } |
| return kPreParseSuccess; |
| } |
| |
| // Report syntax error |
| void ReportUnexpectedToken(i::Token::Value token); |
| void ReportMessageAt(i::Scanner::Location location, |
| const char* type, |
| const char* name_opt) { |
| log_->LogMessage(location.beg_pos, location.end_pos, type, name_opt); |
| } |
| void ReportMessageAt(int start_pos, |
| int end_pos, |
| const char* type, |
| const char* name_opt) { |
| log_->LogMessage(start_pos, end_pos, type, name_opt); |
| } |
| |
| void CheckOctalLiteral(int beg_pos, int end_pos, bool* ok); |
| |
| // All ParseXXX functions take as the last argument an *ok parameter |
| // which is set to false if parsing failed; it is unchanged otherwise. |
| // By making the 'exception handling' explicit, we are forced to check |
| // for failure at the call sites. |
| Statement ParseSourceElement(bool* ok); |
| SourceElements ParseSourceElements(int end_token, bool* ok); |
| Statement ParseStatement(bool* ok); |
| Statement ParseFunctionDeclaration(bool* ok); |
| Statement ParseBlock(bool* ok); |
| Statement ParseVariableStatement(VariableDeclarationContext var_context, |
| bool* ok); |
| Statement ParseVariableDeclarations(VariableDeclarationContext var_context, |
| VariableDeclarationProperties* decl_props, |
| int* num_decl, |
| bool* ok); |
| Statement ParseExpressionOrLabelledStatement(bool* ok); |
| Statement ParseIfStatement(bool* ok); |
| Statement ParseContinueStatement(bool* ok); |
| Statement ParseBreakStatement(bool* ok); |
| Statement ParseReturnStatement(bool* ok); |
| Statement ParseWithStatement(bool* ok); |
| Statement ParseSwitchStatement(bool* ok); |
| Statement ParseDoWhileStatement(bool* ok); |
| Statement ParseWhileStatement(bool* ok); |
| Statement ParseForStatement(bool* ok); |
| Statement ParseThrowStatement(bool* ok); |
| Statement ParseTryStatement(bool* ok); |
| Statement ParseDebuggerStatement(bool* ok); |
| |
| Expression ParseExpression(bool accept_IN, bool* ok); |
| Expression ParseAssignmentExpression(bool accept_IN, bool* ok); |
| Expression ParseConditionalExpression(bool accept_IN, bool* ok); |
| Expression ParseBinaryExpression(int prec, bool accept_IN, bool* ok); |
| Expression ParseUnaryExpression(bool* ok); |
| Expression ParsePostfixExpression(bool* ok); |
| Expression ParseLeftHandSideExpression(bool* ok); |
| Expression ParseNewExpression(bool* ok); |
| Expression ParseMemberExpression(bool* ok); |
| Expression ParseMemberWithNewPrefixesExpression(unsigned new_count, bool* ok); |
| Expression ParsePrimaryExpression(bool* ok); |
| Expression ParseArrayLiteral(bool* ok); |
| Expression ParseObjectLiteral(bool* ok); |
| Expression ParseRegExpLiteral(bool seen_equal, bool* ok); |
| Expression ParseV8Intrinsic(bool* ok); |
| |
| Arguments ParseArguments(bool* ok); |
| Expression ParseFunctionLiteral(bool* ok); |
| void ParseLazyFunctionLiteralBody(bool* ok); |
| |
| Identifier ParseIdentifier(bool* ok); |
| Identifier ParseIdentifierName(bool* ok); |
| Identifier ParseIdentifierNameOrGetOrSet(bool* is_get, |
| bool* is_set, |
| bool* ok); |
| |
| // Logs the currently parsed literal as a symbol in the preparser data. |
| void LogSymbol(); |
| // Log the currently parsed identifier. |
| Identifier GetIdentifierSymbol(); |
| // Log the currently parsed string literal. |
| Expression GetStringSymbol(); |
| |
| i::Token::Value peek() { |
| if (stack_overflow_) return i::Token::ILLEGAL; |
| return scanner_->peek(); |
| } |
| |
| i::Token::Value Next() { |
| if (stack_overflow_) return i::Token::ILLEGAL; |
| { |
| int marker; |
| if (reinterpret_cast<uintptr_t>(&marker) < stack_limit_) { |
| // Further calls to peek/Next will return illegal token. |
| // The current one will still be returned. It might already |
| // have been seen using peek. |
| stack_overflow_ = true; |
| } |
| } |
| return scanner_->Next(); |
| } |
| |
| bool peek_any_identifier(); |
| |
| void set_language_mode(i::LanguageMode language_mode) { |
| scope_->set_language_mode(language_mode); |
| } |
| |
| bool is_classic_mode() { |
| return scope_->language_mode() == i::CLASSIC_MODE; |
| } |
| |
| bool is_extended_mode() { |
| return scope_->language_mode() == i::EXTENDED_MODE; |
| } |
| |
| i::LanguageMode language_mode() { return scope_->language_mode(); } |
| |
| void Consume(i::Token::Value token) { Next(); } |
| |
| void Expect(i::Token::Value token, bool* ok) { |
| if (Next() != token) { |
| *ok = false; |
| } |
| } |
| |
| bool Check(i::Token::Value token) { |
| i::Token::Value next = peek(); |
| if (next == token) { |
| Consume(next); |
| return true; |
| } |
| return false; |
| } |
| void ExpectSemicolon(bool* ok); |
| |
| static int Precedence(i::Token::Value tok, bool accept_IN); |
| |
| void SetStrictModeViolation(i::Scanner::Location, |
| const char* type, |
| bool* ok); |
| |
| void CheckDelayedStrictModeViolation(int beg_pos, int end_pos, bool* ok); |
| |
| void StrictModeIdentifierViolation(i::Scanner::Location, |
| const char* eval_args_type, |
| Identifier identifier, |
| bool* ok); |
| |
| i::Scanner* scanner_; |
| i::ParserRecorder* log_; |
| Scope* scope_; |
| uintptr_t stack_limit_; |
| i::Scanner::Location strict_mode_violation_location_; |
| const char* strict_mode_violation_type_; |
| bool stack_overflow_; |
| bool allow_lazy_; |
| bool allow_modules_; |
| bool allow_natives_syntax_; |
| bool parenthesized_function_; |
| bool harmony_scoping_; |
| }; |
| } } // v8::preparser |
| |
| #endif // V8_PREPARSER_H |