| /* |
| * Copyright (C) 2010 Google, Inc. All Rights Reserved. |
| * Copyright (C) 2011 Apple Inc. All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY |
| * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR |
| * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include "config.h" |
| #include "HTMLTreeBuilder.h" |
| |
| #include "Comment.h" |
| #include "DocumentFragment.h" |
| #include "DocumentType.h" |
| #include "Element.h" |
| #include "Frame.h" |
| #include "HTMLDocument.h" |
| #include "HTMLElementFactory.h" |
| #include "HTMLFormElement.h" |
| #include "HTMLHtmlElement.h" |
| #include "HTMLNames.h" |
| #include "HTMLScriptElement.h" |
| #include "HTMLToken.h" |
| #include "HTMLTokenizer.h" |
| #include "LocalizedStrings.h" |
| #if ENABLE(MATHML) |
| #include "MathMLNames.h" |
| #endif |
| #include "NotImplemented.h" |
| #if ENABLE(SVG) |
| #include "SVGNames.h" |
| #endif |
| #include "ScriptController.h" |
| #include "Settings.h" |
| #include "Text.h" |
| #include <wtf/UnusedParam.h> |
| |
| namespace WebCore { |
| |
| using namespace HTMLNames; |
| |
| namespace { |
| |
| bool hasImpliedEndTag(ContainerNode* node) |
| { |
| return node->hasTagName(ddTag) |
| || node->hasTagName(dtTag) |
| || node->hasTagName(liTag) |
| || node->hasTagName(optionTag) |
| || node->hasTagName(optgroupTag) |
| || node->hasTagName(pTag) |
| || node->hasTagName(rpTag) |
| || node->hasTagName(rtTag); |
| } |
| |
| bool causesFosterParenting(const QualifiedName& tagName) |
| { |
| return tagName == tableTag |
| || tagName == tbodyTag |
| || tagName == tfootTag |
| || tagName == theadTag |
| || tagName == trTag; |
| } |
| |
| } // namespace |
| |
| template<typename ChildType> |
| PassRefPtr<ChildType> HTMLConstructionSite::attach(ContainerNode* rawParent, PassRefPtr<ChildType> prpChild) |
| { |
| RefPtr<ChildType> child = prpChild; |
| RefPtr<ContainerNode> parent = rawParent; |
| |
| // FIXME: It's confusing that HTMLConstructionSite::attach does the magic |
| // redirection to the foster parent but HTMLConstructionSite::attachAtSite |
| // doesn't. It feels like we're missing a concept somehow. |
| if (shouldFosterParent()) { |
| fosterParent(child.get()); |
| ASSERT(child->attached() || !child->parentNode() || !child->parentNode()->attached()); |
| return child.release(); |
| } |
| |
| parent->parserAddChild(child); |
| |
| // An event handler (DOM Mutation, beforeload, et al.) could have removed |
| // the child, in which case we shouldn't try attaching it. |
| if (!child->parentNode()) |
| return child.release(); |
| |
| if (parent->attached() && !child->attached()) |
| child->attach(); |
| return child.release(); |
| } |
| |
| void HTMLConstructionSite::attachAtSite(const AttachmentSite& site, PassRefPtr<Node> prpChild) |
| { |
| // FIXME: It's unfortunate that we need to hold a reference to child |
| // here to call attach(). We should investigate whether we can rely on |
| // |site.parent| to hold a ref at this point. |
| RefPtr<Node> child = prpChild; |
| |
| if (site.nextChild) |
| site.parent->parserInsertBefore(child, site.nextChild); |
| else |
| site.parent->parserAddChild(child); |
| |
| // JavaScript run from beforeload (or DOM Mutation or event handlers) |
| // might have removed the child, in which case we should not attach it. |
| if (child->parentNode() && site.parent->attached() && !child->attached()) |
| child->attach(); |
| } |
| |
| HTMLConstructionSite::HTMLConstructionSite(Document* document) |
| : m_document(document) |
| , m_attachmentRoot(document) |
| , m_fragmentScriptingPermission(FragmentScriptingAllowed) |
| , m_isParsingFragment(false) |
| , m_redirectAttachToFosterParent(false) |
| { |
| } |
| |
| HTMLConstructionSite::HTMLConstructionSite(DocumentFragment* fragment, FragmentScriptingPermission scriptingPermission) |
| : m_document(fragment->document()) |
| , m_attachmentRoot(fragment) |
| , m_fragmentScriptingPermission(scriptingPermission) |
| , m_isParsingFragment(true) |
| , m_redirectAttachToFosterParent(false) |
| { |
| } |
| |
| HTMLConstructionSite::~HTMLConstructionSite() |
| { |
| } |
| |
| void HTMLConstructionSite::detach() |
| { |
| m_document = 0; |
| m_attachmentRoot = 0; |
| } |
| |
| void HTMLConstructionSite::setForm(HTMLFormElement* form) |
| { |
| // This method should only be needed for HTMLTreeBuilder in the fragment case. |
| ASSERT(!m_form); |
| m_form = form; |
| } |
| |
| PassRefPtr<HTMLFormElement> HTMLConstructionSite::takeForm() |
| { |
| return m_form.release(); |
| } |
| |
| void HTMLConstructionSite::dispatchDocumentElementAvailableIfNeeded() |
| { |
| ASSERT(m_document); |
| if (m_document->frame() && !m_isParsingFragment) |
| m_document->frame()->loader()->dispatchDocumentElementAvailable(); |
| } |
| |
| void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken& token) |
| { |
| RefPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(m_document); |
| element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission); |
| m_openElements.pushHTMLHtmlElement(attach<Element>(m_attachmentRoot, element.get())); |
| #if ENABLE(OFFLINE_WEB_APPLICATIONS) |
| element->insertedByParser(); |
| #endif |
| dispatchDocumentElementAvailableIfNeeded(); |
| } |
| |
| void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(AtomicHTMLToken& token, Element* element) |
| { |
| if (!token.attributes()) |
| return; |
| |
| NamedNodeMap* attributes = element->attributes(false); |
| for (unsigned i = 0; i < token.attributes()->length(); ++i) { |
| Attribute* attribute = token.attributes()->attributeItem(i); |
| if (!attributes->getAttributeItem(attribute->name())) |
| element->setAttribute(attribute->name(), attribute->value()); |
| } |
| } |
| |
| void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(AtomicHTMLToken& token) |
| { |
| // FIXME: parse error |
| |
| // Fragments do not have a root HTML element, so any additional HTML elements |
| // encountered during fragment parsing should be ignored. |
| if (m_isParsingFragment) |
| return; |
| |
| mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement()); |
| } |
| |
| void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken& token) |
| { |
| // FIXME: parse error |
| mergeAttributesFromTokenIntoElement(token, m_openElements.bodyElement()); |
| } |
| |
| void HTMLConstructionSite::insertDoctype(AtomicHTMLToken& token) |
| { |
| ASSERT(token.type() == HTMLToken::DOCTYPE); |
| attach(m_attachmentRoot, DocumentType::create(m_document, token.name(), String::adopt(token.publicIdentifier()), String::adopt(token.systemIdentifier()))); |
| |
| // DOCTYPE nodes are only processed when parsing fragments w/o contextElements, which |
| // never occurs. However, if we ever chose to support such, this code is subtly wrong, |
| // because context-less fragments can determine their own quirks mode, and thus change |
| // parsing rules (like <p> inside <table>). For now we ASSERT that we never hit this code |
| // in a fragment, as changing the owning document's compatibility mode would be wrong. |
| ASSERT(!m_isParsingFragment); |
| if (m_isParsingFragment) |
| return; |
| |
| if (token.forceQuirks()) |
| m_document->setCompatibilityMode(Document::QuirksMode); |
| else |
| m_document->setCompatibilityModeFromDoctype(); |
| } |
| |
| void HTMLConstructionSite::insertComment(AtomicHTMLToken& token) |
| { |
| ASSERT(token.type() == HTMLToken::Comment); |
| attach(currentNode(), Comment::create(currentNode()->document(), token.comment())); |
| } |
| |
| void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken& token) |
| { |
| ASSERT(token.type() == HTMLToken::Comment); |
| attach(m_attachmentRoot, Comment::create(m_document, token.comment())); |
| } |
| |
| void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken& token) |
| { |
| ASSERT(token.type() == HTMLToken::Comment); |
| ContainerNode* parent = m_openElements.rootNode(); |
| attach(parent, Comment::create(parent->document(), token.comment())); |
| } |
| |
| PassRefPtr<Element> HTMLConstructionSite::attachToCurrent(PassRefPtr<Element> child) |
| { |
| return attach(currentNode(), child); |
| } |
| |
| void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken& token) |
| { |
| ASSERT(!shouldFosterParent()); |
| m_head = attachToCurrent(createHTMLElement(token)); |
| m_openElements.pushHTMLHeadElement(m_head); |
| } |
| |
| void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken& token) |
| { |
| ASSERT(!shouldFosterParent()); |
| m_openElements.pushHTMLBodyElement(attachToCurrent(createHTMLElement(token))); |
| } |
| |
| void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken& token, bool isDemoted) |
| { |
| RefPtr<Element> element = createHTMLElement(token); |
| ASSERT(element->hasTagName(formTag)); |
| RefPtr<HTMLFormElement> form = static_pointer_cast<HTMLFormElement>(element.release()); |
| form->setDemoted(isDemoted); |
| m_openElements.push(attachToCurrent(form.release())); |
| ASSERT(currentElement()->isHTMLElement()); |
| ASSERT(currentElement()->hasTagName(formTag)); |
| m_form = static_cast<HTMLFormElement*>(currentElement()); |
| } |
| |
| void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken& token) |
| { |
| m_openElements.push(attachToCurrent(createHTMLElement(token))); |
| } |
| |
| void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken& token) |
| { |
| ASSERT(token.type() == HTMLToken::StartTag); |
| RefPtr<Element> element = attachToCurrent(createHTMLElement(token)); |
| // Normally HTMLElementStack is responsible for calling finishParsingChildren, |
| // but self-closing elements are never in the element stack so the stack |
| // doesn't get a chance to tell them that we're done parsing their children. |
| element->finishParsingChildren(); |
| // FIXME: Do we want to acknowledge the token's self-closing flag? |
| // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag |
| } |
| |
| void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken& token) |
| { |
| // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements |
| // Possible active formatting elements include: |
| // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u. |
| insertHTMLElement(token); |
| m_activeFormattingElements.append(currentElement()); |
| } |
| |
| void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken& token) |
| { |
| RefPtr<HTMLScriptElement> element = HTMLScriptElement::create(scriptTag, currentNode()->document(), true); |
| if (m_fragmentScriptingPermission == FragmentScriptingAllowed) |
| element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission); |
| m_openElements.push(attachToCurrent(element.release())); |
| } |
| |
| void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken& token, const AtomicString& namespaceURI) |
| { |
| ASSERT(token.type() == HTMLToken::StartTag); |
| notImplemented(); // parseError when xmlns or xmlns:xlink are wrong. |
| |
| RefPtr<Element> element = attachToCurrent(createElement(token, namespaceURI)); |
| if (!token.selfClosing()) |
| m_openElements.push(element); |
| } |
| |
| void HTMLConstructionSite::insertTextNode(const String& characters) |
| { |
| AttachmentSite site; |
| site.parent = currentNode(); |
| site.nextChild = 0; |
| if (shouldFosterParent()) |
| findFosterSite(site); |
| |
| unsigned currentPosition = 0; |
| |
| // FIXME: Splitting text nodes into smaller chunks contradicts HTML5 spec, but is currently necessary |
| // for performance, see <https://bugs.webkit.org/show_bug.cgi?id=55898>. |
| |
| Node* previousChild = site.nextChild ? site.nextChild->previousSibling() : site.parent->lastChild(); |
| if (previousChild && previousChild->isTextNode()) { |
| // FIXME: We're only supposed to append to this text node if it |
| // was the last text node inserted by the parser. |
| CharacterData* textNode = static_cast<CharacterData*>(previousChild); |
| currentPosition = textNode->parserAppendData(characters.characters(), characters.length(), Text::defaultLengthLimit); |
| } |
| |
| while (currentPosition < characters.length()) { |
| RefPtr<Text> textNode = Text::createWithLengthLimit(site.parent->document(), characters, currentPosition); |
| // If we have a whole string of unbreakable characters the above could lead to an infinite loop. Exceeding the length limit is the lesser evil. |
| if (!textNode->length()) |
| textNode = Text::create(site.parent->document(), characters.substring(currentPosition)); |
| |
| currentPosition += textNode->length(); |
| ASSERT(currentPosition <= characters.length()); |
| attachAtSite(site, textNode.release()); |
| } |
| } |
| |
| PassRefPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken& token, const AtomicString& namespaceURI) |
| { |
| QualifiedName tagName(nullAtom, token.name(), namespaceURI); |
| RefPtr<Element> element = currentNode()->document()->createElement(tagName, true); |
| element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission); |
| return element.release(); |
| } |
| |
| PassRefPtr<Element> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken& token) |
| { |
| QualifiedName tagName(nullAtom, token.name(), xhtmlNamespaceURI); |
| // FIXME: This can't use HTMLConstructionSite::createElement because we |
| // have to pass the current form element. We should rework form association |
| // to occur after construction to allow better code sharing here. |
| RefPtr<Element> element = HTMLElementFactory::createHTMLElement(tagName, currentNode()->document(), form(), true); |
| element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission); |
| ASSERT(element->isHTMLElement()); |
| return element.release(); |
| } |
| |
| PassRefPtr<Element> HTMLConstructionSite::createHTMLElementFromElementRecord(HTMLElementStack::ElementRecord* record) |
| { |
| return createHTMLElementFromSavedElement(record->element()); |
| } |
| |
| namespace { |
| |
| PassRefPtr<NamedNodeMap> cloneAttributes(Element* element) |
| { |
| NamedNodeMap* attributes = element->attributes(true); |
| if (!attributes) |
| return 0; |
| |
| RefPtr<NamedNodeMap> newAttributes = NamedNodeMap::create(); |
| for (size_t i = 0; i < attributes->length(); ++i) { |
| Attribute* attribute = attributes->attributeItem(i); |
| RefPtr<Attribute> clone = Attribute::createMapped(attribute->name(), attribute->value()); |
| newAttributes->addAttribute(clone); |
| } |
| return newAttributes.release(); |
| } |
| |
| } |
| |
| PassRefPtr<Element> HTMLConstructionSite::createHTMLElementFromSavedElement(Element* element) |
| { |
| // FIXME: This method is wrong. We should be using the original token. |
| // Using an Element* causes us to fail examples like this: |
| // <b id="1"><p><script>document.getElementById("1").id = "2"</script></p>TEXT</b> |
| // When reconstructTheActiveFormattingElements calls this method to open |
| // a second <b> tag to wrap TEXT, it will have id "2", even though the HTML5 |
| // spec implies it should be "1". Minefield matches the HTML5 spec here. |
| |
| ASSERT(element->isHTMLElement()); // otherwise localName() might be wrong. |
| AtomicHTMLToken fakeToken(HTMLToken::StartTag, element->localName(), cloneAttributes(element)); |
| return createHTMLElement(fakeToken); |
| } |
| |
| bool HTMLConstructionSite::indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const |
| { |
| if (m_activeFormattingElements.isEmpty()) |
| return false; |
| unsigned index = m_activeFormattingElements.size(); |
| do { |
| --index; |
| const HTMLFormattingElementList::Entry& entry = m_activeFormattingElements.at(index); |
| if (entry.isMarker() || m_openElements.contains(entry.element())) { |
| firstUnopenElementIndex = index + 1; |
| return firstUnopenElementIndex < m_activeFormattingElements.size(); |
| } |
| } while (index); |
| firstUnopenElementIndex = index; |
| return true; |
| } |
| |
| void HTMLConstructionSite::reconstructTheActiveFormattingElements() |
| { |
| unsigned firstUnopenElementIndex; |
| if (!indexOfFirstUnopenFormattingElement(firstUnopenElementIndex)) |
| return; |
| |
| unsigned unopenEntryIndex = firstUnopenElementIndex; |
| ASSERT(unopenEntryIndex < m_activeFormattingElements.size()); |
| for (; unopenEntryIndex < m_activeFormattingElements.size(); ++unopenEntryIndex) { |
| HTMLFormattingElementList::Entry& unopenedEntry = m_activeFormattingElements.at(unopenEntryIndex); |
| RefPtr<Element> reconstructed = createHTMLElementFromSavedElement(unopenedEntry.element()); |
| m_openElements.push(attachToCurrent(reconstructed.release())); |
| unopenedEntry.replaceElement(currentElement()); |
| } |
| } |
| |
| void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(const AtomicString& tagName) |
| { |
| while (hasImpliedEndTag(currentNode()) && !currentNode()->hasLocalName(tagName)) |
| m_openElements.pop(); |
| } |
| |
| void HTMLConstructionSite::generateImpliedEndTags() |
| { |
| while (hasImpliedEndTag(currentNode())) |
| m_openElements.pop(); |
| } |
| |
| void HTMLConstructionSite::findFosterSite(AttachmentSite& site) |
| { |
| HTMLElementStack::ElementRecord* lastTableElementRecord = m_openElements.topmost(tableTag.localName()); |
| if (lastTableElementRecord) { |
| Element* lastTableElement = lastTableElementRecord->element(); |
| if (ContainerNode* parent = lastTableElement->parentNode()) { |
| site.parent = parent; |
| site.nextChild = lastTableElement; |
| return; |
| } |
| site.parent = lastTableElementRecord->next()->element(); |
| site.nextChild = 0; |
| return; |
| } |
| // Fragment case |
| site.parent = m_openElements.rootNode(); // DocumentFragment |
| site.nextChild = 0; |
| } |
| |
| bool HTMLConstructionSite::shouldFosterParent() const |
| { |
| return m_redirectAttachToFosterParent |
| && currentNode()->isElementNode() |
| && causesFosterParenting(currentElement()->tagQName()); |
| } |
| |
| void HTMLConstructionSite::fosterParent(Node* node) |
| { |
| AttachmentSite site; |
| findFosterSite(site); |
| attachAtSite(site, node); |
| } |
| |
| } |