| /* |
| * Copyright (C) 2008 Apple Inc. All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
| * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
| * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #ifndef Parser_h |
| #define Parser_h |
| |
| #include <wtf/Platform.h> |
| |
| #if ENABLE(WREC) |
| |
| #include "Escapes.h" |
| #include "Quantifier.h" |
| #include "UString.h" |
| #include "WRECGenerator.h" |
| #include <wtf/ASCIICType.h> |
| |
| namespace JSC { namespace WREC { |
| |
| struct CharacterClass; |
| |
| class Parser { |
| typedef Generator::JumpList JumpList; |
| typedef Generator::ParenthesesType ParenthesesType; |
| |
| friend class SavedState; |
| |
| public: |
| Parser(const UString& pattern, bool ignoreCase, bool multiline) |
| : m_generator(*this) |
| , m_data(pattern.data()) |
| , m_size(pattern.size()) |
| , m_ignoreCase(ignoreCase) |
| , m_multiline(multiline) |
| { |
| reset(); |
| } |
| |
| Generator& generator() { return m_generator; } |
| |
| bool ignoreCase() const { return m_ignoreCase; } |
| bool multiline() const { return m_multiline; } |
| |
| void recordSubpattern() { ++m_numSubpatterns; } |
| unsigned numSubpatterns() const { return m_numSubpatterns; } |
| |
| const char* error() const { return m_error; } |
| const char* syntaxError() const { return m_error == ParenthesesNotSupported ? 0 : m_error; } |
| |
| void parsePattern(JumpList& failures) |
| { |
| reset(); |
| |
| parseDisjunction(failures); |
| |
| if (peek() != EndOfPattern) |
| setError(ParenthesesUnmatched); // Parsing the pattern should fully consume it. |
| } |
| |
| void parseDisjunction(JumpList& failures); |
| void parseAlternative(JumpList& failures); |
| bool parseTerm(JumpList& failures); |
| bool parseNonCharacterEscape(JumpList& failures, const Escape&); |
| bool parseParentheses(JumpList& failures); |
| bool parseCharacterClass(JumpList& failures); |
| bool parseCharacterClassQuantifier(JumpList& failures, const CharacterClass& charClass, bool invert); |
| bool parseBackreferenceQuantifier(JumpList& failures, unsigned subpatternId); |
| |
| private: |
| class SavedState { |
| public: |
| SavedState(Parser& parser) |
| : m_parser(parser) |
| , m_index(parser.m_index) |
| { |
| } |
| |
| void restore() |
| { |
| m_parser.m_index = m_index; |
| } |
| |
| private: |
| Parser& m_parser; |
| unsigned m_index; |
| }; |
| |
| void reset() |
| { |
| m_index = 0; |
| m_numSubpatterns = 0; |
| m_error = 0; |
| } |
| |
| void setError(const char* error) |
| { |
| if (m_error) |
| return; |
| m_error = error; |
| } |
| |
| int peek() |
| { |
| if (m_index >= m_size) |
| return EndOfPattern; |
| return m_data[m_index]; |
| } |
| |
| int consume() |
| { |
| if (m_index >= m_size) |
| return EndOfPattern; |
| return m_data[m_index++]; |
| } |
| |
| bool peekIsDigit() |
| { |
| return WTF::isASCIIDigit(peek()); |
| } |
| |
| unsigned peekDigit() |
| { |
| ASSERT(peekIsDigit()); |
| return peek() - '0'; |
| } |
| |
| unsigned consumeDigit() |
| { |
| ASSERT(peekIsDigit()); |
| return consume() - '0'; |
| } |
| |
| unsigned consumeNumber() |
| { |
| int n = consumeDigit(); |
| while (peekIsDigit()) { |
| n *= 10; |
| n += consumeDigit(); |
| } |
| return n; |
| } |
| |
| int consumeHex(int count) |
| { |
| int n = 0; |
| while (count--) { |
| if (!WTF::isASCIIHexDigit(peek())) |
| return -1; |
| n = (n << 4) | WTF::toASCIIHexValue(consume()); |
| } |
| return n; |
| } |
| |
| unsigned consumeOctal() |
| { |
| unsigned n = 0; |
| while (n < 32 && WTF::isASCIIOctalDigit(peek())) |
| n = n * 8 + consumeDigit(); |
| return n; |
| } |
| |
| ALWAYS_INLINE Quantifier consumeGreedyQuantifier(); |
| Quantifier consumeQuantifier(); |
| Escape consumeEscape(bool inCharacterClass); |
| ParenthesesType consumeParenthesesType(); |
| |
| static const int EndOfPattern = -1; |
| |
| // Error messages. |
| static const char* QuantifierOutOfOrder; |
| static const char* QuantifierWithoutAtom; |
| static const char* ParenthesesUnmatched; |
| static const char* ParenthesesTypeInvalid; |
| static const char* ParenthesesNotSupported; |
| static const char* CharacterClassUnmatched; |
| static const char* CharacterClassOutOfOrder; |
| static const char* EscapeUnterminated; |
| |
| Generator m_generator; |
| const UChar* m_data; |
| unsigned m_size; |
| unsigned m_index; |
| bool m_ignoreCase; |
| bool m_multiline; |
| unsigned m_numSubpatterns; |
| const char* m_error; |
| }; |
| |
| } } // namespace JSC::WREC |
| |
| #endif // ENABLE(WREC) |
| |
| #endif // Parser_h |