| /* |
| * Copyright 2005 Maksim Orlovich <maksim@kde.org> |
| * Copyright (C) 2006 Apple Computer, Inc. |
| * Copyright (C) 2007 Alexey Proskuryakov <ap@webkit.org> |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
| * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include "config.h" |
| #include "XPathParser.h" |
| |
| #if ENABLE(XPATH) |
| |
| #include "ExceptionCode.h" |
| #include "XPathEvaluator.h" |
| #include "XPathException.h" |
| #include "XPathNSResolver.h" |
| #include "XPathStep.h" |
| #include <wtf/StdLibExtras.h> |
| #include <wtf/text/StringHash.h> |
| |
| int xpathyyparse(void*); |
| |
| using namespace WTF; |
| using namespace Unicode; |
| |
| namespace WebCore { |
| namespace XPath { |
| |
| class LocationPath; |
| |
| #include "XPathGrammar.h" |
| |
| Parser* Parser::currentParser = 0; |
| |
| enum XMLCat { NameStart, NameCont, NotPartOfName }; |
| |
| typedef HashMap<String, Step::Axis> AxisNamesMap; |
| |
| static XMLCat charCat(UChar aChar) |
| { |
| //### might need to add some special cases from the XML spec. |
| |
| if (aChar == '_') |
| return NameStart; |
| |
| if (aChar == '.' || aChar == '-') |
| return NameCont; |
| CharCategory category = Unicode::category(aChar); |
| if (category & (Letter_Uppercase | Letter_Lowercase | Letter_Other | Letter_Titlecase | Number_Letter)) |
| return NameStart; |
| if (category & (Mark_NonSpacing | Mark_SpacingCombining | Mark_Enclosing | Letter_Modifier | Number_DecimalDigit)) |
| return NameCont; |
| return NotPartOfName; |
| } |
| |
| static void setUpAxisNamesMap(AxisNamesMap& axisNames) |
| { |
| struct AxisName { |
| const char* name; |
| Step::Axis axis; |
| }; |
| const AxisName axisNameList[] = { |
| { "ancestor", Step::AncestorAxis }, |
| { "ancestor-or-self", Step::AncestorOrSelfAxis }, |
| { "attribute", Step::AttributeAxis }, |
| { "child", Step::ChildAxis }, |
| { "descendant", Step::DescendantAxis }, |
| { "descendant-or-self", Step::DescendantOrSelfAxis }, |
| { "following", Step::FollowingAxis }, |
| { "following-sibling", Step::FollowingSiblingAxis }, |
| { "namespace", Step::NamespaceAxis }, |
| { "parent", Step::ParentAxis }, |
| { "preceding", Step::PrecedingAxis }, |
| { "preceding-sibling", Step::PrecedingSiblingAxis }, |
| { "self", Step::SelfAxis } |
| }; |
| for (unsigned i = 0; i < sizeof(axisNameList) / sizeof(axisNameList[0]); ++i) |
| axisNames.set(axisNameList[i].name, axisNameList[i].axis); |
| } |
| |
| static bool isAxisName(const String& name, Step::Axis& type) |
| { |
| DEFINE_STATIC_LOCAL(AxisNamesMap, axisNames, ()); |
| |
| if (axisNames.isEmpty()) |
| setUpAxisNamesMap(axisNames); |
| |
| AxisNamesMap::iterator it = axisNames.find(name); |
| if (it == axisNames.end()) |
| return false; |
| type = it->second; |
| return true; |
| } |
| |
| static bool isNodeTypeName(const String& name) |
| { |
| DEFINE_STATIC_LOCAL(HashSet<String>, nodeTypeNames, ()); |
| if (nodeTypeNames.isEmpty()) { |
| nodeTypeNames.add("comment"); |
| nodeTypeNames.add("text"); |
| nodeTypeNames.add("processing-instruction"); |
| nodeTypeNames.add("node"); |
| } |
| return nodeTypeNames.contains(name); |
| } |
| |
| // Returns whether the current token can possibly be a binary operator, given |
| // the previous token. Necessary to disambiguate some of the operators |
| // (* (multiply), div, and, or, mod) in the [32] Operator rule |
| // (check http://www.w3.org/TR/xpath#exprlex). |
| bool Parser::isBinaryOperatorContext() const |
| { |
| switch (m_lastTokenType) { |
| case 0: |
| case '@': case AXISNAME: case '(': case '[': case ',': |
| case AND: case OR: case MULOP: |
| case '/': case SLASHSLASH: case '|': case PLUS: case MINUS: |
| case EQOP: case RELOP: |
| return false; |
| default: |
| return true; |
| } |
| } |
| |
| void Parser::skipWS() |
| { |
| while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos])) |
| ++m_nextPos; |
| } |
| |
| Token Parser::makeTokenAndAdvance(int code, int advance) |
| { |
| m_nextPos += advance; |
| return Token(code); |
| } |
| |
| Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance) |
| { |
| m_nextPos += advance; |
| return Token(code, val); |
| } |
| |
| Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance) |
| { |
| m_nextPos += advance; |
| return Token(code, val); |
| } |
| |
| // Returns next char if it's there and interesting, 0 otherwise |
| char Parser::peekAheadHelper() |
| { |
| if (m_nextPos + 1 >= m_data.length()) |
| return 0; |
| UChar next = m_data[m_nextPos + 1]; |
| if (next >= 0xff) |
| return 0; |
| return next; |
| } |
| |
| char Parser::peekCurHelper() |
| { |
| if (m_nextPos >= m_data.length()) |
| return 0; |
| UChar next = m_data[m_nextPos]; |
| if (next >= 0xff) |
| return 0; |
| return next; |
| } |
| |
| Token Parser::lexString() |
| { |
| UChar delimiter = m_data[m_nextPos]; |
| int startPos = m_nextPos + 1; |
| |
| for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) { |
| if (m_data[m_nextPos] == delimiter) { |
| String value = m_data.substring(startPos, m_nextPos - startPos); |
| if (value.isNull()) |
| value = ""; |
| ++m_nextPos; // Consume the char. |
| return Token(LITERAL, value); |
| } |
| } |
| |
| // Ouch, went off the end -- report error. |
| return Token(XPATH_ERROR); |
| } |
| |
| Token Parser::lexNumber() |
| { |
| int startPos = m_nextPos; |
| bool seenDot = false; |
| |
| // Go until end or a non-digits character. |
| for (; m_nextPos < m_data.length(); ++m_nextPos) { |
| UChar aChar = m_data[m_nextPos]; |
| if (aChar >= 0xff) break; |
| |
| if (aChar < '0' || aChar > '9') { |
| if (aChar == '.' && !seenDot) |
| seenDot = true; |
| else |
| break; |
| } |
| } |
| |
| return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos)); |
| } |
| |
| bool Parser::lexNCName(String& name) |
| { |
| int startPos = m_nextPos; |
| if (m_nextPos >= m_data.length()) |
| return false; |
| |
| if (charCat(m_data[m_nextPos]) != NameStart) |
| return false; |
| |
| // Keep going until we get a character that's not good for names. |
| for (; m_nextPos < m_data.length(); ++m_nextPos) |
| if (charCat(m_data[m_nextPos]) == NotPartOfName) |
| break; |
| |
| name = m_data.substring(startPos, m_nextPos - startPos); |
| return true; |
| } |
| |
| bool Parser::lexQName(String& name) |
| { |
| String n1; |
| if (!lexNCName(n1)) |
| return false; |
| |
| skipWS(); |
| |
| // If the next character is :, what we just got it the prefix, if not, |
| // it's the whole thing. |
| if (peekAheadHelper() != ':') { |
| name = n1; |
| return true; |
| } |
| |
| String n2; |
| if (!lexNCName(n2)) |
| return false; |
| |
| name = n1 + ":" + n2; |
| return true; |
| } |
| |
| Token Parser::nextTokenInternal() |
| { |
| skipWS(); |
| |
| if (m_nextPos >= m_data.length()) |
| return Token(0); |
| |
| char code = peekCurHelper(); |
| switch (code) { |
| case '(': case ')': case '[': case ']': |
| case '@': case ',': case '|': |
| return makeTokenAndAdvance(code); |
| case '\'': |
| case '\"': |
| return lexString(); |
| case '0': case '1': case '2': case '3': case '4': |
| case '5': case '6': case '7': case '8': case '9': |
| return lexNumber(); |
| case '.': { |
| char next = peekAheadHelper(); |
| if (next == '.') |
| return makeTokenAndAdvance(DOTDOT, 2); |
| if (next >= '0' && next <= '9') |
| return lexNumber(); |
| return makeTokenAndAdvance('.'); |
| } |
| case '/': |
| if (peekAheadHelper() == '/') |
| return makeTokenAndAdvance(SLASHSLASH, 2); |
| return makeTokenAndAdvance('/'); |
| case '+': |
| return makeTokenAndAdvance(PLUS); |
| case '-': |
| return makeTokenAndAdvance(MINUS); |
| case '=': |
| return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ); |
| case '!': |
| if (peekAheadHelper() == '=') |
| return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2); |
| return Token(XPATH_ERROR); |
| case '<': |
| if (peekAheadHelper() == '=') |
| return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2); |
| return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT); |
| case '>': |
| if (peekAheadHelper() == '=') |
| return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2); |
| return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT); |
| case '*': |
| if (isBinaryOperatorContext()) |
| return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul); |
| ++m_nextPos; |
| return Token(NAMETEST, "*"); |
| case '$': { // $ QName |
| m_nextPos++; |
| String name; |
| if (!lexQName(name)) |
| return Token(XPATH_ERROR); |
| return Token(VARIABLEREFERENCE, name); |
| } |
| } |
| |
| String name; |
| if (!lexNCName(name)) |
| return Token(XPATH_ERROR); |
| |
| skipWS(); |
| // If we're in an operator context, check for any operator names |
| if (isBinaryOperatorContext()) { |
| if (name == "and") //### hash? |
| return Token(AND); |
| if (name == "or") |
| return Token(OR); |
| if (name == "mod") |
| return Token(MULOP, NumericOp::OP_Mod); |
| if (name == "div") |
| return Token(MULOP, NumericOp::OP_Div); |
| } |
| |
| // See whether we are at a : |
| if (peekCurHelper() == ':') { |
| m_nextPos++; |
| // Any chance it's an axis name? |
| if (peekCurHelper() == ':') { |
| m_nextPos++; |
| |
| //It might be an axis name. |
| Step::Axis axis; |
| if (isAxisName(name, axis)) |
| return Token(AXISNAME, axis); |
| // Ugh, :: is only valid in axis names -> error |
| return Token(XPATH_ERROR); |
| } |
| |
| // Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest |
| skipWS(); |
| if (peekCurHelper() == '*') { |
| m_nextPos++; |
| return Token(NAMETEST, name + ":*"); |
| } |
| |
| // Make a full qname. |
| String n2; |
| if (!lexNCName(n2)) |
| return Token(XPATH_ERROR); |
| |
| name = name + ":" + n2; |
| } |
| |
| skipWS(); |
| if (peekCurHelper() == '(') { |
| //note: we don't swallow the (here! |
| |
| //either node type of function name |
| if (isNodeTypeName(name)) { |
| if (name == "processing-instruction") |
| return Token(PI, name); |
| |
| return Token(NODETYPE, name); |
| } |
| //must be a function name. |
| return Token(FUNCTIONNAME, name); |
| } |
| |
| // At this point, it must be NAMETEST. |
| return Token(NAMETEST, name); |
| } |
| |
| Token Parser::nextToken() |
| { |
| Token toRet = nextTokenInternal(); |
| m_lastTokenType = toRet.type; |
| return toRet; |
| } |
| |
| Parser::Parser() |
| { |
| reset(String()); |
| } |
| |
| Parser::~Parser() |
| { |
| } |
| |
| void Parser::reset(const String& data) |
| { |
| m_nextPos = 0; |
| m_data = data; |
| m_lastTokenType = 0; |
| |
| m_topExpr = 0; |
| m_gotNamespaceError = false; |
| } |
| |
| int Parser::lex(void* data) |
| { |
| YYSTYPE* yylval = static_cast<YYSTYPE*>(data); |
| Token tok = nextToken(); |
| |
| switch (tok.type) { |
| case AXISNAME: |
| yylval->axis = tok.axis; |
| break; |
| case MULOP: |
| yylval->numop = tok.numop; |
| break; |
| case RELOP: |
| case EQOP: |
| yylval->eqop = tok.eqop; |
| break; |
| case NODETYPE: |
| case PI: |
| case FUNCTIONNAME: |
| case LITERAL: |
| case VARIABLEREFERENCE: |
| case NUMBER: |
| case NAMETEST: |
| yylval->str = new String(tok.str); |
| registerString(yylval->str); |
| break; |
| } |
| |
| return tok.type; |
| } |
| |
| bool Parser::expandQName(const String& qName, String& localName, String& namespaceURI) |
| { |
| size_t colon = qName.find(':'); |
| if (colon != notFound) { |
| if (!m_resolver) |
| return false; |
| namespaceURI = m_resolver->lookupNamespaceURI(qName.left(colon)); |
| if (namespaceURI.isNull()) |
| return false; |
| localName = qName.substring(colon + 1); |
| } else |
| localName = qName; |
| |
| return true; |
| } |
| |
| Expression* Parser::parseStatement(const String& statement, PassRefPtr<XPathNSResolver> resolver, ExceptionCode& ec) |
| { |
| reset(statement); |
| |
| m_resolver = resolver; |
| |
| Parser* oldParser = currentParser; |
| currentParser = this; |
| int parseError = xpathyyparse(this); |
| currentParser = oldParser; |
| |
| if (parseError) { |
| deleteAllValues(m_parseNodes); |
| m_parseNodes.clear(); |
| |
| HashSet<Vector<Predicate*>*>::iterator pend = m_predicateVectors.end(); |
| for (HashSet<Vector<Predicate*>*>::iterator it = m_predicateVectors.begin(); it != pend; ++it) { |
| deleteAllValues(**it); |
| delete *it; |
| } |
| m_predicateVectors.clear(); |
| |
| HashSet<Vector<Expression*>*>::iterator eend = m_expressionVectors.end(); |
| for (HashSet<Vector<Expression*>*>::iterator it = m_expressionVectors.begin(); it != eend; ++it) { |
| deleteAllValues(**it); |
| delete *it; |
| } |
| m_expressionVectors.clear(); |
| |
| deleteAllValues(m_strings); |
| m_strings.clear(); |
| |
| deleteAllValues(m_nodeTests); |
| m_nodeTests.clear(); |
| |
| m_topExpr = 0; |
| |
| if (m_gotNamespaceError) |
| ec = NAMESPACE_ERR; |
| else |
| ec = XPathException::INVALID_EXPRESSION_ERR; |
| return 0; |
| } |
| |
| ASSERT(m_parseNodes.size() == 1); |
| ASSERT(*m_parseNodes.begin() == m_topExpr); |
| ASSERT(m_expressionVectors.size() == 0); |
| ASSERT(m_predicateVectors.size() == 0); |
| ASSERT(m_strings.size() == 0); |
| ASSERT(m_nodeTests.size() == 0); |
| |
| m_parseNodes.clear(); |
| Expression* result = m_topExpr; |
| m_topExpr = 0; |
| |
| return result; |
| } |
| |
| void Parser::registerParseNode(ParseNode* node) |
| { |
| if (node == 0) |
| return; |
| |
| ASSERT(!m_parseNodes.contains(node)); |
| |
| m_parseNodes.add(node); |
| } |
| |
| void Parser::unregisterParseNode(ParseNode* node) |
| { |
| if (node == 0) |
| return; |
| |
| ASSERT(m_parseNodes.contains(node)); |
| |
| m_parseNodes.remove(node); |
| } |
| |
| void Parser::registerPredicateVector(Vector<Predicate*>* vector) |
| { |
| if (vector == 0) |
| return; |
| |
| ASSERT(!m_predicateVectors.contains(vector)); |
| |
| m_predicateVectors.add(vector); |
| } |
| |
| void Parser::deletePredicateVector(Vector<Predicate*>* vector) |
| { |
| if (vector == 0) |
| return; |
| |
| ASSERT(m_predicateVectors.contains(vector)); |
| |
| m_predicateVectors.remove(vector); |
| delete vector; |
| } |
| |
| |
| void Parser::registerExpressionVector(Vector<Expression*>* vector) |
| { |
| if (vector == 0) |
| return; |
| |
| ASSERT(!m_expressionVectors.contains(vector)); |
| |
| m_expressionVectors.add(vector); |
| } |
| |
| void Parser::deleteExpressionVector(Vector<Expression*>* vector) |
| { |
| if (vector == 0) |
| return; |
| |
| ASSERT(m_expressionVectors.contains(vector)); |
| |
| m_expressionVectors.remove(vector); |
| delete vector; |
| } |
| |
| void Parser::registerString(String* s) |
| { |
| if (s == 0) |
| return; |
| |
| ASSERT(!m_strings.contains(s)); |
| |
| m_strings.add(s); |
| } |
| |
| void Parser::deleteString(String* s) |
| { |
| if (s == 0) |
| return; |
| |
| ASSERT(m_strings.contains(s)); |
| |
| m_strings.remove(s); |
| delete s; |
| } |
| |
| void Parser::registerNodeTest(Step::NodeTest* t) |
| { |
| if (t == 0) |
| return; |
| |
| ASSERT(!m_nodeTests.contains(t)); |
| |
| m_nodeTests.add(t); |
| } |
| |
| void Parser::deleteNodeTest(Step::NodeTest* t) |
| { |
| if (t == 0) |
| return; |
| |
| ASSERT(m_nodeTests.contains(t)); |
| |
| m_nodeTests.remove(t); |
| delete t; |
| } |
| |
| } |
| } |
| |
| #endif // ENABLE(XPATH) |