| // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "base/compiler_specific.h" |
| #include "base/file_path.h" |
| #include "base/file_util.h" |
| #include "base/hash_tables.h" |
| #include "base/string_util.h" |
| #include "base/utf_string_conversions.h" |
| #include "net/base/net_util.h" |
| #include "net/url_request/url_request_context.h" |
| #include "third_party/WebKit/Source/WebKit/chromium/public/WebCString.h" |
| #include "third_party/WebKit/Source/WebKit/chromium/public/WebData.h" |
| #include "third_party/WebKit/Source/WebKit/chromium/public/WebDocument.h" |
| #include "third_party/WebKit/Source/WebKit/chromium/public/WebElement.h" |
| #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h" |
| #include "third_party/WebKit/Source/WebKit/chromium/public/WebNode.h" |
| #include "third_party/WebKit/Source/WebKit/chromium/public/WebNodeCollection.h" |
| #include "third_party/WebKit/Source/WebKit/chromium/public/WebNodeList.h" |
| #include "third_party/WebKit/Source/WebKit/chromium/public/WebPageSerializer.h" |
| #include "third_party/WebKit/Source/WebKit/chromium/public/WebPageSerializerClient.h" |
| #include "third_party/WebKit/Source/WebKit/chromium/public/WebString.h" |
| #include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h" |
| #include "third_party/WebKit/Source/WebKit/chromium/public/WebVector.h" |
| #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h" |
| #include "webkit/glue/dom_operations.h" |
| #include "webkit/glue/webkit_glue.h" |
| #include "webkit/tools/test_shell/simple_resource_loader_bridge.h" |
| #include "webkit/tools/test_shell/test_shell_test.h" |
| |
| using WebKit::WebCString; |
| using WebKit::WebData; |
| using WebKit::WebDocument; |
| using WebKit::WebElement; |
| using WebKit::WebFrame; |
| using WebKit::WebNode; |
| using WebKit::WebNodeCollection; |
| using WebKit::WebNodeList; |
| using WebKit::WebPageSerializer; |
| using WebKit::WebPageSerializerClient; |
| using WebKit::WebNode; |
| using WebKit::WebString; |
| using WebKit::WebURL; |
| using WebKit::WebView; |
| using WebKit::WebVector; |
| |
| namespace { |
| |
| // Iterate recursively over sub-frames to find one with with a given url. |
| WebFrame* FindSubFrameByURL(WebView* web_view, const GURL& url) { |
| if (!web_view->mainFrame()) |
| return NULL; |
| |
| std::vector<WebFrame*> stack; |
| stack.push_back(web_view->mainFrame()); |
| |
| while (!stack.empty()) { |
| WebFrame* current_frame = stack.back(); |
| stack.pop_back(); |
| if (GURL(current_frame->url()) == url) |
| return current_frame; |
| WebNodeCollection all = current_frame->document().all(); |
| for (WebNode node = all.firstItem(); |
| !node.isNull(); node = all.nextItem()) { |
| if (!node.isElementNode()) |
| continue; |
| // Check frame tag and iframe tag |
| WebElement element = node.to<WebElement>(); |
| if (!element.hasTagName("frame") && !element.hasTagName("iframe")) |
| continue; |
| WebFrame* sub_frame = WebFrame::fromFrameOwnerElement(element); |
| if (sub_frame) |
| stack.push_back(sub_frame); |
| } |
| } |
| return NULL; |
| } |
| |
| class DomSerializerTests : public TestShellTest, |
| public WebPageSerializerClient { |
| public: |
| DomSerializerTests() |
| : local_directory_name_(FILE_PATH_LITERAL("./dummy_files/")) { } |
| |
| // DomSerializerDelegate. |
| void didSerializeDataForFrame(const WebURL& frame_web_url, |
| const WebCString& data, |
| PageSerializationStatus status) { |
| |
| GURL frame_url(frame_web_url); |
| // If the all frames are finished saving, check all finish status |
| if (status == WebPageSerializerClient::AllFramesAreFinished) { |
| SerializationFinishStatusMap::iterator it = |
| serialization_finish_status_.begin(); |
| for (; it != serialization_finish_status_.end(); ++it) |
| ASSERT_TRUE(it->second); |
| serialized_ = true; |
| return; |
| } |
| |
| // Check finish status of current frame. |
| SerializationFinishStatusMap::iterator it = |
| serialization_finish_status_.find(frame_url.spec()); |
| // New frame, set initial status as false. |
| if (it == serialization_finish_status_.end()) |
| serialization_finish_status_[frame_url.spec()] = false; |
| |
| it = serialization_finish_status_.find(frame_url.spec()); |
| ASSERT_TRUE(it != serialization_finish_status_.end()); |
| // In process frame, finish status should be false. |
| ASSERT_FALSE(it->second); |
| |
| // Add data to corresponding frame's content. |
| serialized_frame_map_[frame_url.spec()] += data.data(); |
| |
| // Current frame is completed saving, change the finish status. |
| if (status == WebPageSerializerClient::CurrentFrameIsFinished) |
| it->second = true; |
| } |
| |
| bool HasSerializedFrame(const GURL& frame_url) { |
| return serialized_frame_map_.find(frame_url.spec()) != |
| serialized_frame_map_.end(); |
| } |
| |
| const std::string& GetSerializedContentForFrame( |
| const GURL& frame_url) { |
| return serialized_frame_map_[frame_url.spec()]; |
| } |
| |
| // Load web page according to specific URL. |
| void LoadPageFromURL(const GURL& page_url) { |
| // Load the test file. |
| test_shell_->ResetTestController(); |
| test_shell_->LoadURL(page_url); |
| test_shell_->WaitTestFinished(); |
| } |
| |
| // Load web page according to input content and relative URLs within |
| // the document. |
| void LoadContents(const std::string& contents, |
| const GURL& base_url, |
| const WebString encoding_info) { |
| test_shell_->ResetTestController(); |
| // If input encoding is empty, use UTF-8 as default encoding. |
| if (encoding_info.isEmpty()) { |
| test_shell_->webView()->mainFrame()->loadHTMLString(contents, base_url); |
| } else { |
| WebData data(contents.data(), contents.length()); |
| |
| // Do not use WebFrame.LoadHTMLString because it assumes that input |
| // html contents use UTF-8 encoding. |
| // TODO(darin): This should use WebFrame::loadData. |
| WebFrame* web_frame = |
| test_shell_->webView()->mainFrame(); |
| |
| ASSERT_TRUE(web_frame != NULL); |
| |
| web_frame->loadData(data, "text/html", encoding_info, base_url); |
| } |
| |
| test_shell_->WaitTestFinished(); |
| } |
| |
| // Serialize page DOM according to specific page URL. The parameter |
| // recursive_serialization indicates whether we will serialize all |
| // sub-frames. |
| void SerializeDomForURL(const GURL& page_url, |
| bool recursive_serialization) { |
| // Find corresponding WebFrame according to page_url. |
| WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), |
| page_url); |
| ASSERT_TRUE(web_frame != NULL); |
| // Add input file URl to links_. |
| links_.assign(&page_url,1); |
| // Add dummy file path to local_path_. |
| WebString file_path = webkit_glue::FilePathStringToWebString( |
| FILE_PATH_LITERAL("c:\\dummy.htm")); |
| local_paths_.assign(&file_path, 1); |
| // Start serializing DOM. |
| bool result = WebPageSerializer::serialize(web_frame, |
| recursive_serialization, |
| static_cast<WebPageSerializerClient*>(this), |
| links_, |
| local_paths_, |
| webkit_glue::FilePathToWebString(local_directory_name_)); |
| ASSERT_TRUE(result); |
| ASSERT_TRUE(serialized_); |
| } |
| |
| private: |
| // Map frame_url to corresponding serialized_content. |
| typedef base::hash_map<std::string, std::string> SerializedFrameContentMap; |
| SerializedFrameContentMap serialized_frame_map_; |
| // Map frame_url to corresponding status of serialization finish. |
| typedef base::hash_map<std::string, bool> SerializationFinishStatusMap; |
| SerializationFinishStatusMap serialization_finish_status_; |
| // Flag indicates whether the process of serializing DOM is finished or not. |
| bool serialized_; |
| // The links_ contain dummy original URLs of all saved links. |
| WebVector<WebURL> links_; |
| // The local_paths_ contain dummy corresponding local file paths of all saved |
| // links, which matched links_ one by one. |
| WebVector<WebString> local_paths_; |
| // The local_directory_name_ is dummy relative path of directory which |
| // contain all saved auxiliary files included all sub frames and resources. |
| const FilePath local_directory_name_; |
| |
| protected: |
| // testing::Test |
| virtual void SetUp() { |
| TestShellTest::SetUp(); |
| serialized_ = false; |
| } |
| |
| virtual void TearDown() { |
| TestShellTest::TearDown(); |
| } |
| }; |
| |
| // Helper function that test whether the first node in the doc is a doc type |
| // node. |
| bool HasDocType(const WebDocument& doc) { |
| WebNode node = doc.firstChild(); |
| if (node.isNull()) |
| return false; |
| return node.nodeType() == WebNode::DocumentTypeNode; |
| } |
| |
| // Helper function for checking whether input node is META tag. Return true |
| // means it is META element, otherwise return false. The parameter charset_info |
| // return actual charset info if the META tag has charset declaration. |
| bool IsMetaElement(const WebNode& node, std::string& charset_info) { |
| if (!node.isElementNode()) |
| return false; |
| const WebElement meta = node.toConst<WebElement>(); |
| if (!meta.hasTagName("meta")) |
| return false; |
| charset_info.erase(0, charset_info.length()); |
| // Check the META charset declaration. |
| WebString httpEquiv = meta.getAttribute("http-equiv"); |
| if (LowerCaseEqualsASCII(httpEquiv, "content-type")) { |
| std::string content = meta.getAttribute("content").utf8(); |
| int pos = content.find("charset", 0); |
| if (pos > -1) { |
| // Add a dummy charset declaration to charset_info, which indicates this |
| // META tag has charset declaration although we do not get correct value |
| // yet. |
| charset_info.append("has-charset-declaration"); |
| int remaining_length = content.length() - pos - 7; |
| if (!remaining_length) |
| return true; |
| int start_pos = pos + 7; |
| // Find "=" symbol. |
| while (remaining_length--) |
| if (content[start_pos++] == L'=') |
| break; |
| // Skip beginning space. |
| while (remaining_length) { |
| if (content[start_pos] > 0x0020) |
| break; |
| ++start_pos; |
| --remaining_length; |
| } |
| if (!remaining_length) |
| return true; |
| int end_pos = start_pos; |
| // Now we find out the start point of charset info. Search the end point. |
| while (remaining_length--) { |
| if (content[end_pos] <= 0x0020 || content[end_pos] == L';') |
| break; |
| ++end_pos; |
| } |
| // Get actual charset info. |
| charset_info = content.substr(start_pos, end_pos - start_pos); |
| return true; |
| } |
| } |
| return true; |
| } |
| |
| // If original contents have document type, the serialized contents also have |
| // document type. |
| TEST_F(DomSerializerTests, SerializeHTMLDOMWithDocType) { |
| FilePath page_file_path = data_dir_; |
| page_file_path = page_file_path.AppendASCII("dom_serializer"); |
| page_file_path = page_file_path.AppendASCII("youtube_1.htm"); |
| GURL file_url = net::FilePathToFileURL(page_file_path); |
| ASSERT_TRUE(file_url.SchemeIsFile()); |
| // Load the test file. |
| LoadPageFromURL(file_url); |
| // Make sure original contents have document type. |
| WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); |
| ASSERT_TRUE(web_frame != NULL); |
| WebDocument doc = web_frame->document(); |
| ASSERT_TRUE(HasDocType(doc)); |
| // Do serialization. |
| SerializeDomForURL(file_url, false); |
| // Load the serialized contents. |
| ASSERT_TRUE(HasSerializedFrame(file_url)); |
| const std::string& serialized_contents = |
| GetSerializedContentForFrame(file_url); |
| LoadContents(serialized_contents, file_url, |
| web_frame->encoding()); |
| // Make sure serialized contents still have document type. |
| web_frame = test_shell_->webView()->mainFrame(); |
| doc = web_frame->document(); |
| ASSERT_TRUE(HasDocType(doc)); |
| } |
| |
| // If original contents do not have document type, the serialized contents |
| // also do not have document type. |
| TEST_F(DomSerializerTests, SerializeHTMLDOMWithoutDocType) { |
| FilePath page_file_path = data_dir_; |
| page_file_path = page_file_path.AppendASCII("dom_serializer"); |
| page_file_path = page_file_path.AppendASCII("youtube_2.htm"); |
| GURL file_url = net::FilePathToFileURL(page_file_path); |
| ASSERT_TRUE(file_url.SchemeIsFile()); |
| // Load the test file. |
| LoadPageFromURL(file_url); |
| // Make sure original contents do not have document type. |
| WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); |
| ASSERT_TRUE(web_frame != NULL); |
| WebDocument doc = web_frame->document(); |
| ASSERT_TRUE(!HasDocType(doc)); |
| // Do serialization. |
| SerializeDomForURL(file_url, false); |
| // Load the serialized contents. |
| ASSERT_TRUE(HasSerializedFrame(file_url)); |
| const std::string& serialized_contents = |
| GetSerializedContentForFrame(file_url); |
| LoadContents(serialized_contents, file_url, |
| web_frame->encoding()); |
| // Make sure serialized contents do not have document type. |
| web_frame = test_shell_->webView()->mainFrame(); |
| doc = web_frame->document(); |
| ASSERT_TRUE(!HasDocType(doc)); |
| } |
| |
| // Serialize XML document which has all 5 built-in entities. After |
| // finishing serialization, the serialized contents should be same |
| // with original XML document. |
| TEST_F(DomSerializerTests, SerializeXMLDocWithBuiltInEntities) { |
| FilePath page_file_path = data_dir_; |
| page_file_path = page_file_path.AppendASCII("dom_serializer"); |
| page_file_path = page_file_path.AppendASCII("note.xml"); |
| // Read original contents for later comparison. |
| std::string original_contents; |
| ASSERT_TRUE(file_util::ReadFileToString(page_file_path, &original_contents)); |
| // Get file URL. |
| GURL file_url = net::FilePathToFileURL(page_file_path); |
| ASSERT_TRUE(file_url.SchemeIsFile()); |
| // Load the test file. |
| LoadPageFromURL(file_url); |
| // Do serialization. |
| SerializeDomForURL(file_url, false); |
| // Compare the serialized contents with original contents. |
| ASSERT_TRUE(HasSerializedFrame(file_url)); |
| const std::string& serialized_contents = |
| GetSerializedContentForFrame(file_url); |
| ASSERT_EQ(original_contents, serialized_contents); |
| } |
| |
| // When serializing DOM, we add MOTW declaration before html tag. |
| TEST_F(DomSerializerTests, SerializeHTMLDOMWithAddingMOTW) { |
| FilePath page_file_path = data_dir_; |
| page_file_path = page_file_path.AppendASCII("dom_serializer"); |
| page_file_path = page_file_path.AppendASCII("youtube_2.htm"); |
| // Read original contents for later comparison . |
| std::string original_contents; |
| ASSERT_TRUE(file_util::ReadFileToString(page_file_path, &original_contents)); |
| // Get file URL. |
| GURL file_url = net::FilePathToFileURL(page_file_path); |
| ASSERT_TRUE(file_url.SchemeIsFile()); |
| // Make sure original contents does not have MOTW; |
| std::string motw_declaration = |
| WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8(); |
| ASSERT_FALSE(motw_declaration.empty()); |
| // The encoding of original contents is ISO-8859-1, so we convert the MOTW |
| // declaration to ASCII and search whether original contents has it or not. |
| ASSERT_TRUE(std::string::npos == |
| original_contents.find(motw_declaration)); |
| // Load the test file. |
| LoadPageFromURL(file_url); |
| // Do serialization. |
| SerializeDomForURL(file_url, false); |
| // Make sure the serialized contents have MOTW ; |
| ASSERT_TRUE(HasSerializedFrame(file_url)); |
| const std::string& serialized_contents = |
| GetSerializedContentForFrame(file_url); |
| ASSERT_FALSE(std::string::npos == |
| serialized_contents.find(motw_declaration)); |
| } |
| |
| // When serializing DOM, we will add the META which have correct charset |
| // declaration as first child of HEAD element for resolving WebKit bug: |
| // http://bugs.webkit.org/show_bug.cgi?id=16621 even the original document |
| // does not have META charset declaration. |
| TEST_F(DomSerializerTests, SerializeHTMLDOMWithNoMetaCharsetInOriginalDoc) { |
| FilePath page_file_path = data_dir_; |
| page_file_path = page_file_path.AppendASCII("dom_serializer"); |
| page_file_path = page_file_path.AppendASCII("youtube_1.htm"); |
| // Get file URL. |
| GURL file_url = net::FilePathToFileURL(page_file_path); |
| ASSERT_TRUE(file_url.SchemeIsFile()); |
| // Load the test file. |
| LoadPageFromURL(file_url); |
| |
| // Make sure there is no META charset declaration in original document. |
| WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); |
| ASSERT_TRUE(web_frame != NULL); |
| WebDocument doc = web_frame->document(); |
| ASSERT_TRUE(doc.isHTMLDocument()); |
| WebElement head_element = doc.head(); |
| ASSERT_TRUE(!head_element.isNull()); |
| // Go through all children of HEAD element. |
| for (WebNode child = head_element.firstChild(); !child.isNull(); |
| child = child.nextSibling()) { |
| std::string charset_info; |
| if (IsMetaElement(child, charset_info)) |
| ASSERT_TRUE(charset_info.empty()); |
| } |
| // Do serialization. |
| SerializeDomForURL(file_url, false); |
| |
| // Load the serialized contents. |
| ASSERT_TRUE(HasSerializedFrame(file_url)); |
| const std::string& serialized_contents = |
| GetSerializedContentForFrame(file_url); |
| LoadContents(serialized_contents, file_url, |
| web_frame->encoding()); |
| // Make sure the first child of HEAD element is META which has charset |
| // declaration in serialized contents. |
| web_frame = test_shell_->webView()->mainFrame(); |
| ASSERT_TRUE(web_frame != NULL); |
| doc = web_frame->document(); |
| ASSERT_TRUE(doc.isHTMLDocument()); |
| head_element = doc.head(); |
| ASSERT_TRUE(!head_element.isNull()); |
| WebNode meta_node = head_element.firstChild(); |
| ASSERT_TRUE(!meta_node.isNull()); |
| // Get meta charset info. |
| std::string charset_info2; |
| ASSERT_TRUE(IsMetaElement(meta_node, charset_info2)); |
| ASSERT_TRUE(!charset_info2.empty()); |
| ASSERT_TRUE(charset_info2 == std::string(web_frame->encoding().utf8())); |
| |
| // Make sure no more additional META tags which have charset declaration. |
| for (WebNode child = meta_node.nextSibling(); !child.isNull(); |
| child = child.nextSibling()) { |
| std::string charset_info; |
| if (IsMetaElement(child, charset_info)) |
| ASSERT_TRUE(charset_info.empty()); |
| } |
| } |
| |
| // When serializing DOM, if the original document has multiple META charset |
| // declaration, we will add the META which have correct charset declaration |
| // as first child of HEAD element and remove all original META charset |
| // declarations. |
| TEST_F(DomSerializerTests, |
| SerializeHTMLDOMWithMultipleMetaCharsetInOriginalDoc) { |
| FilePath page_file_path = data_dir_; |
| page_file_path = page_file_path.AppendASCII("dom_serializer"); |
| page_file_path = page_file_path.AppendASCII("youtube_2.htm"); |
| // Get file URL. |
| GURL file_url = net::FilePathToFileURL(page_file_path); |
| ASSERT_TRUE(file_url.SchemeIsFile()); |
| // Load the test file. |
| LoadPageFromURL(file_url); |
| |
| // Make sure there are multiple META charset declarations in original |
| // document. |
| WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); |
| ASSERT_TRUE(web_frame != NULL); |
| WebDocument doc = web_frame->document(); |
| ASSERT_TRUE(doc.isHTMLDocument()); |
| WebElement head_ele = doc.head(); |
| ASSERT_TRUE(!head_ele.isNull()); |
| // Go through all children of HEAD element. |
| int charset_declaration_count = 0; |
| for (WebNode child = head_ele.firstChild(); !child.isNull(); |
| child = child.nextSibling()) { |
| std::string charset_info; |
| if (IsMetaElement(child, charset_info) && !charset_info.empty()) |
| charset_declaration_count++; |
| } |
| // The original doc has more than META tags which have charset declaration. |
| ASSERT_TRUE(charset_declaration_count > 1); |
| |
| // Do serialization. |
| SerializeDomForURL(file_url, false); |
| |
| // Load the serialized contents. |
| ASSERT_TRUE(HasSerializedFrame(file_url)); |
| const std::string& serialized_contents = |
| GetSerializedContentForFrame(file_url); |
| LoadContents(serialized_contents, file_url, |
| web_frame->encoding()); |
| // Make sure only first child of HEAD element is META which has charset |
| // declaration in serialized contents. |
| web_frame = test_shell_->webView()->mainFrame(); |
| ASSERT_TRUE(web_frame != NULL); |
| doc = web_frame->document(); |
| ASSERT_TRUE(doc.isHTMLDocument()); |
| head_ele = doc.head(); |
| ASSERT_TRUE(!head_ele.isNull()); |
| WebNode meta_node = head_ele.firstChild(); |
| ASSERT_TRUE(!meta_node.isNull()); |
| // Get meta charset info. |
| std::string charset_info2; |
| ASSERT_TRUE(IsMetaElement(meta_node, charset_info2)); |
| ASSERT_TRUE(!charset_info2.empty()); |
| ASSERT_TRUE(charset_info2 == std::string(web_frame->encoding().utf8())); |
| |
| // Make sure no more additional META tags which have charset declaration. |
| for (WebNode child = meta_node.nextSibling(); !child.isNull(); |
| child = child.nextSibling()) { |
| std::string charset_info; |
| if (IsMetaElement(child, charset_info)) |
| ASSERT_TRUE(charset_info.empty()); |
| } |
| } |
| |
| // Test situation of html entities in text when serializing HTML DOM. |
| TEST_F(DomSerializerTests, SerializeHTMLDOMWithEntitiesInText) { |
| FilePath page_file_path = data_dir_; |
| page_file_path = page_file_path.AppendASCII( |
| "dom_serializer/htmlentities_in_text.htm"); |
| // Get file URL. The URL is dummy URL to identify the following loading |
| // actions. The test content is in constant:original_contents. |
| GURL file_url = net::FilePathToFileURL(page_file_path); |
| ASSERT_TRUE(file_url.SchemeIsFile()); |
| // Test contents. |
| static const char* const original_contents = |
| "<html><body>&<>\"\'</body></html>"; |
| // Load the test contents. |
| LoadContents(original_contents, file_url, WebString()); |
| |
| // Get BODY's text content in DOM. |
| WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); |
| ASSERT_TRUE(web_frame != NULL); |
| WebDocument doc = web_frame->document(); |
| ASSERT_TRUE(doc.isHTMLDocument()); |
| WebElement body_ele = doc.body(); |
| ASSERT_TRUE(!body_ele.isNull()); |
| WebNode text_node = body_ele.firstChild(); |
| ASSERT_TRUE(text_node.isTextNode()); |
| ASSERT_TRUE(std::string(text_node.createMarkup().utf8()) == |
| "&<>\"\'"); |
| // Do serialization. |
| SerializeDomForURL(file_url, false); |
| // Compare the serialized contents with original contents. |
| ASSERT_TRUE(HasSerializedFrame(file_url)); |
| const std::string& serialized_contents = |
| GetSerializedContentForFrame(file_url); |
| // Compare the serialized contents with original contents to make sure |
| // they are same. |
| // Because we add MOTW when serializing DOM, so before comparison, we also |
| // need to add MOTW to original_contents. |
| std::string original_str = |
| WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8(); |
| original_str += original_contents; |
| // Since WebCore now inserts a new HEAD element if there is no HEAD element |
| // when creating BODY element. (Please see HTMLParser::bodyCreateErrorCheck.) |
| // We need to append the HEAD content and corresponding META content if we |
| // find WebCore-generated HEAD element. |
| if (!doc.head().isNull()) { |
| WebString encoding = web_frame->encoding(); |
| std::string htmlTag("<html>"); |
| std::string::size_type pos = original_str.find(htmlTag); |
| ASSERT_NE(std::string::npos, pos); |
| pos += htmlTag.length(); |
| std::string head_part("<head>"); |
| head_part += |
| WebPageSerializer::generateMetaCharsetDeclaration(encoding).utf8(); |
| head_part += "</head>"; |
| original_str.insert(pos, head_part); |
| } |
| ASSERT_EQ(original_str, serialized_contents); |
| } |
| |
| // Test situation of html entities in attribute value when serializing |
| // HTML DOM. |
| // This test started to fail at WebKit r65388. See http://crbug.com/52279. |
| TEST_F(DomSerializerTests, SerializeHTMLDOMWithEntitiesInAttributeValue) { |
| FilePath page_file_path = data_dir_; |
| page_file_path = page_file_path.AppendASCII( |
| "dom_serializer/htmlentities_in_attribute_value.htm"); |
| // Get file URL. The URL is dummy URL to identify the following loading |
| // actions. The test content is in constant:original_contents. |
| GURL file_url = net::FilePathToFileURL(page_file_path); |
| ASSERT_TRUE(file_url.SchemeIsFile()); |
| // Test contents. |
| static const char* const original_contents = |
| "<html><body title=\"&<>"'\"></body></html>"; |
| // Load the test contents. |
| LoadContents(original_contents, file_url, WebString()); |
| // Get value of BODY's title attribute in DOM. |
| WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); |
| ASSERT_TRUE(web_frame != NULL); |
| WebDocument doc = web_frame->document(); |
| ASSERT_TRUE(doc.isHTMLDocument()); |
| WebElement body_ele = doc.body(); |
| ASSERT_TRUE(!body_ele.isNull()); |
| WebString value = body_ele.getAttribute("title"); |
| ASSERT_TRUE(std::string(value.utf8()) == "&<>\"\'"); |
| // Do serialization. |
| SerializeDomForURL(file_url, false); |
| // Compare the serialized contents with original contents. |
| ASSERT_TRUE(HasSerializedFrame(file_url)); |
| const std::string& serialized_contents = |
| GetSerializedContentForFrame(file_url); |
| // Compare the serialized contents with original contents to make sure |
| // they are same. |
| std::string original_str = |
| WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8(); |
| original_str += original_contents; |
| if (!doc.isNull()) { |
| WebString encoding = web_frame->encoding(); |
| std::string htmlTag("<html>"); |
| std::string::size_type pos = original_str.find(htmlTag); |
| ASSERT_NE(std::string::npos, pos); |
| pos += htmlTag.length(); |
| std::string head_part("<head>"); |
| head_part += |
| WebPageSerializer::generateMetaCharsetDeclaration(encoding).utf8(); |
| head_part += "</head>"; |
| original_str.insert(pos, head_part); |
| } |
| ASSERT_EQ(original_str, serialized_contents); |
| } |
| |
| // Test situation of non-standard HTML entities when serializing HTML DOM. |
| // This test started to fail at WebKit r65351. See http://crbug.com/52279. |
| TEST_F(DomSerializerTests, SerializeHTMLDOMWithNonStandardEntities) { |
| // Make a test file URL and load it. |
| FilePath page_file_path = data_dir_; |
| page_file_path = page_file_path.AppendASCII("dom_serializer"); |
| page_file_path = page_file_path.AppendASCII("nonstandard_htmlentities.htm"); |
| GURL file_url = net::FilePathToFileURL(page_file_path); |
| LoadPageFromURL(file_url); |
| |
| // Get value of BODY's title attribute in DOM. |
| WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); |
| WebDocument doc = web_frame->document(); |
| ASSERT_TRUE(doc.isHTMLDocument()); |
| WebElement body_element = doc.body(); |
| // Unescaped string for "%⊅¹'". |
| static const wchar_t parsed_value[] = { |
| '%', 0x2285, 0x00b9, '\'', 0 |
| }; |
| WebString value = body_element.getAttribute("title"); |
| ASSERT_TRUE(UTF16ToWide(value) == parsed_value); |
| ASSERT_TRUE(UTF16ToWide(body_element.innerText()) == parsed_value); |
| |
| // Do serialization. |
| SerializeDomForURL(file_url, false); |
| // Check the serialized string. |
| ASSERT_TRUE(HasSerializedFrame(file_url)); |
| const std::string& serialized_contents = |
| GetSerializedContentForFrame(file_url); |
| // Confirm that the serialized string has no non-standard HTML entities. |
| ASSERT_EQ(std::string::npos, serialized_contents.find("%")); |
| ASSERT_EQ(std::string::npos, serialized_contents.find("⊅")); |
| ASSERT_EQ(std::string::npos, serialized_contents.find("¹")); |
| ASSERT_EQ(std::string::npos, serialized_contents.find("'")); |
| } |
| |
| // Test situation of BASE tag in original document when serializing HTML DOM. |
| // When serializing, we should comment the BASE tag, append a new BASE tag. |
| // rewrite all the savable URLs to relative local path, and change other URLs |
| // to absolute URLs. |
| TEST_F(DomSerializerTests, SerializeHTMLDOMWithBaseTag) { |
| // There are total 2 available base tags in this test file. |
| const int kTotalBaseTagCountInTestFile = 2; |
| |
| FilePath page_file_path = data_dir_.AppendASCII("dom_serializer"); |
| file_util::EnsureEndsWithSeparator(&page_file_path); |
| |
| // Get page dir URL which is base URL of this file. |
| GURL path_dir_url = net::FilePathToFileURL(page_file_path); |
| // Get file path. |
| page_file_path = |
| page_file_path.AppendASCII("html_doc_has_base_tag.htm"); |
| // Get file URL. |
| GURL file_url = net::FilePathToFileURL(page_file_path); |
| ASSERT_TRUE(file_url.SchemeIsFile()); |
| // Load the test file. |
| LoadPageFromURL(file_url); |
| // Since for this test, we assume there is no savable sub-resource links for |
| // this test file, also all links are relative URLs in this test file, so we |
| // need to check those relative URLs and make sure document has BASE tag. |
| WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); |
| ASSERT_TRUE(web_frame != NULL); |
| WebDocument doc = web_frame->document(); |
| ASSERT_TRUE(doc.isHTMLDocument()); |
| // Go through all descent nodes. |
| WebNodeCollection all = doc.all(); |
| int original_base_tag_count = 0; |
| for (WebNode node = all.firstItem(); !node.isNull(); |
| node = all.nextItem()) { |
| if (!node.isElementNode()) |
| continue; |
| WebElement element = node.to<WebElement>(); |
| if (element.hasTagName("base")) { |
| original_base_tag_count++; |
| } else { |
| // Get link. |
| WebString value = |
| webkit_glue::GetSubResourceLinkFromElement(element); |
| if (value.isNull() && element.hasTagName("a")) { |
| value = element.getAttribute("href"); |
| if (value.isEmpty()) |
| value = WebString(); |
| } |
| // Each link is relative link. |
| if (!value.isNull()) { |
| GURL link(value.utf8()); |
| ASSERT_TRUE(link.scheme().empty()); |
| } |
| } |
| } |
| ASSERT_EQ(original_base_tag_count, kTotalBaseTagCountInTestFile); |
| // Make sure in original document, the base URL is not equal with the |
| // |path_dir_url|. |
| GURL original_base_url(doc.baseURL()); |
| ASSERT_NE(original_base_url, path_dir_url); |
| |
| // Do serialization. |
| SerializeDomForURL(file_url, false); |
| |
| // Load the serialized contents. |
| ASSERT_TRUE(HasSerializedFrame(file_url)); |
| const std::string& serialized_contents = |
| GetSerializedContentForFrame(file_url); |
| LoadContents(serialized_contents, file_url, |
| web_frame->encoding()); |
| |
| // Make sure all links are absolute URLs and doc there are some number of |
| // BASE tags in serialized HTML data. Each of those BASE tags have same base |
| // URL which is as same as URL of current test file. |
| web_frame = test_shell_->webView()->mainFrame(); |
| ASSERT_TRUE(web_frame != NULL); |
| doc = web_frame->document(); |
| ASSERT_TRUE(doc.isHTMLDocument()); |
| // Go through all descent nodes. |
| all = doc.all(); |
| int new_base_tag_count = 0; |
| for (WebNode node = all.firstItem(); !node.isNull(); |
| node = all.nextItem()) { |
| if (!node.isElementNode()) |
| continue; |
| WebElement element = node.to<WebElement>(); |
| if (element.hasTagName("base")) { |
| new_base_tag_count++; |
| } else { |
| // Get link. |
| WebString value = |
| webkit_glue::GetSubResourceLinkFromElement(element); |
| if (value.isNull() && element.hasTagName("a")) { |
| value = element.getAttribute("href"); |
| if (value.isEmpty()) |
| value = WebString(); |
| } |
| // Each link is absolute link. |
| if (!value.isNull()) { |
| GURL link(std::string(value.utf8())); |
| ASSERT_FALSE(link.scheme().empty()); |
| } |
| } |
| } |
| // We have one more added BASE tag which is generated by JavaScript. |
| ASSERT_EQ(new_base_tag_count, original_base_tag_count + 1); |
| // Make sure in new document, the base URL is equal with the |path_dir_url|. |
| GURL new_base_url(doc.baseURL()); |
| ASSERT_EQ(new_base_url, path_dir_url); |
| } |
| |
| // Serializing page which has an empty HEAD tag. |
| TEST_F(DomSerializerTests, SerializeHTMLDOMWithEmptyHead) { |
| FilePath page_file_path = data_dir_; |
| page_file_path = page_file_path.AppendASCII("dom_serializer"); |
| page_file_path = page_file_path.AppendASCII("empty_head.htm"); |
| GURL file_url = net::FilePathToFileURL(page_file_path); |
| ASSERT_TRUE(file_url.SchemeIsFile()); |
| |
| // Load the test html content. |
| static const char* const empty_head_contents = |
| "<html><head></head><body>hello world</body></html>"; |
| LoadContents(empty_head_contents, file_url, WebString()); |
| |
| // Make sure the head tag is empty. |
| WebFrame* web_frame = test_shell_->webView()->mainFrame(); |
| ASSERT_TRUE(web_frame != NULL); |
| WebDocument doc = web_frame->document(); |
| ASSERT_TRUE(doc.isHTMLDocument()); |
| WebElement head_element = doc.head(); |
| ASSERT_TRUE(!head_element.isNull()); |
| ASSERT_TRUE(!head_element.hasChildNodes()); |
| ASSERT_TRUE(head_element.childNodes().length() == 0); |
| |
| // Do serialization. |
| SerializeDomForURL(file_url, false); |
| // Make sure the serialized contents have META ; |
| ASSERT_TRUE(HasSerializedFrame(file_url)); |
| const std::string& serialized_contents = |
| GetSerializedContentForFrame(file_url); |
| |
| // Reload serialized contents and make sure there is only one META tag. |
| LoadContents(serialized_contents, file_url, web_frame->encoding()); |
| web_frame = test_shell_->webView()->mainFrame(); |
| ASSERT_TRUE(web_frame != NULL); |
| doc = web_frame->document(); |
| ASSERT_TRUE(doc.isHTMLDocument()); |
| head_element = doc.head(); |
| ASSERT_TRUE(!head_element.isNull()); |
| ASSERT_TRUE(head_element.hasChildNodes()); |
| ASSERT_TRUE(head_element.childNodes().length() == 1); |
| WebNode meta_node = head_element.firstChild(); |
| ASSERT_TRUE(!meta_node.isNull()); |
| // Get meta charset info. |
| std::string charset_info; |
| ASSERT_TRUE(IsMetaElement(meta_node, charset_info)); |
| ASSERT_TRUE(!charset_info.empty()); |
| ASSERT_TRUE(charset_info == std::string(web_frame->encoding().utf8())); |
| |
| // Check the body's first node is text node and its contents are |
| // "hello world" |
| WebElement body_element = doc.body(); |
| ASSERT_TRUE(!body_element.isNull()); |
| WebNode text_node = body_element.firstChild(); |
| ASSERT_TRUE(text_node.isTextNode()); |
| WebString text_node_contents = text_node.nodeValue(); |
| ASSERT_TRUE(std::string(text_node_contents.utf8()) == "hello world"); |
| } |
| |
| // Test that we don't crash when the page contains an iframe that |
| // was handled as a download (http://crbug.com/42212). |
| TEST_F(DomSerializerTests, SerializeDocumentWithDownloadedIFrame) { |
| FilePath page_file_path = data_dir_; |
| page_file_path = page_file_path.AppendASCII("dom_serializer"); |
| page_file_path = page_file_path.AppendASCII("iframe-src-is-exe.htm"); |
| GURL file_url = net::FilePathToFileURL(page_file_path); |
| ASSERT_TRUE(file_url.SchemeIsFile()); |
| // Load the test file. |
| LoadPageFromURL(file_url); |
| // Do a recursive serialization. We pass if we don't crash. |
| SerializeDomForURL(file_url, true); |
| } |
| |
| } // namespace |