| // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include <algorithm> |
| #include <string> |
| |
| #include "net/base/escape.h" |
| |
| #include "base/basictypes.h" |
| #include "base/i18n/icu_string_conversions.h" |
| #include "base/string_util.h" |
| #include "base/stringprintf.h" |
| #include "base/utf_string_conversions.h" |
| #include "testing/gtest/include/gtest/gtest.h" |
| |
| namespace { |
| |
| static const size_t kNpos = string16::npos; |
| |
| struct EscapeCase { |
| const wchar_t* input; |
| const wchar_t* output; |
| }; |
| |
| struct UnescapeURLCase { |
| const wchar_t* input; |
| UnescapeRule::Type rules; |
| const wchar_t* output; |
| }; |
| |
| struct UnescapeURLCaseASCII { |
| const char* input; |
| UnescapeRule::Type rules; |
| const char* output; |
| }; |
| |
| struct UnescapeAndDecodeCase { |
| const char* input; |
| |
| // The expected output when run through UnescapeURL. |
| const char* url_unescaped; |
| |
| // The expected output when run through UnescapeQuery. |
| const char* query_unescaped; |
| |
| // The expected output when run through UnescapeAndDecodeURLComponent. |
| const wchar_t* decoded; |
| }; |
| |
| struct AdjustOffsetCase { |
| const char* input; |
| size_t input_offset; |
| size_t output_offset; |
| }; |
| |
| struct EscapeForHTMLCase { |
| const char* input; |
| const char* expected_output; |
| }; |
| |
| } // namespace |
| |
| TEST(EscapeTest, EscapeTextForFormSubmission) { |
| const EscapeCase escape_cases[] = { |
| {L"foo", L"foo"}, |
| {L"foo bar", L"foo+bar"}, |
| {L"foo++", L"foo%2B%2B"} |
| }; |
| for (size_t i = 0; i < arraysize(escape_cases); ++i) { |
| EscapeCase value = escape_cases[i]; |
| EXPECT_EQ(WideToUTF16Hack(value.output), |
| EscapeQueryParamValueUTF8(WideToUTF16Hack(value.input), true)); |
| } |
| |
| const EscapeCase escape_cases_no_plus[] = { |
| {L"foo", L"foo"}, |
| {L"foo bar", L"foo%20bar"}, |
| {L"foo++", L"foo%2B%2B"} |
| }; |
| for (size_t i = 0; i < arraysize(escape_cases_no_plus); ++i) { |
| EscapeCase value = escape_cases_no_plus[i]; |
| EXPECT_EQ(WideToUTF16Hack(value.output), |
| EscapeQueryParamValueUTF8(WideToUTF16Hack(value.input), false)); |
| } |
| |
| // Test all the values in we're supposed to be escaping. |
| const std::string no_escape( |
| "abcdefghijklmnopqrstuvwxyz" |
| "ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
| "0123456789" |
| "!'()*-._~"); |
| for (int i = 0; i < 256; ++i) { |
| std::string in; |
| in.push_back(i); |
| std::string out = EscapeQueryParamValue(in, true); |
| if (0 == i) { |
| EXPECT_EQ(out, std::string("%00")); |
| } else if (32 == i) { |
| // Spaces are plus escaped like web forms. |
| EXPECT_EQ(out, std::string("+")); |
| } else if (no_escape.find(in) == std::string::npos) { |
| // Check %hex escaping |
| std::string expected = base::StringPrintf("%%%02X", i); |
| EXPECT_EQ(expected, out); |
| } else { |
| // No change for things in the no_escape list. |
| EXPECT_EQ(out, in); |
| } |
| } |
| |
| // Check to see if EscapeQueryParamValueUTF8 is the same as |
| // EscapeQueryParamValue(..., kCodepageUTF8,) |
| string16 test_str; |
| test_str.reserve(5000); |
| for (int i = 1; i < 5000; ++i) { |
| test_str.push_back(i); |
| } |
| string16 wide; |
| EXPECT_TRUE(EscapeQueryParamValue(test_str, base::kCodepageUTF8, true, |
| &wide)); |
| EXPECT_EQ(wide, EscapeQueryParamValueUTF8(test_str, true)); |
| EXPECT_TRUE(EscapeQueryParamValue(test_str, base::kCodepageUTF8, false, |
| &wide)); |
| EXPECT_EQ(wide, EscapeQueryParamValueUTF8(test_str, false)); |
| } |
| |
| TEST(EscapeTest, EscapePath) { |
| ASSERT_EQ( |
| // Most of the character space we care about, un-escaped |
| EscapePath( |
| "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;" |
| "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
| "[\\]^_`abcdefghijklmnopqrstuvwxyz" |
| "{|}~\x7f\x80\xff"), |
| // Escaped |
| "%02%0A%1D%20!%22%23$%25&'()*+,-./0123456789%3A;" |
| "%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
| "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz" |
| "%7B%7C%7D~%7F%80%FF"); |
| } |
| |
| TEST(EscapeTest, EscapeUrlEncodedData) { |
| ASSERT_EQ( |
| // Most of the character space we care about, un-escaped |
| EscapeUrlEncodedData( |
| "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;" |
| "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
| "[\\]^_`abcdefghijklmnopqrstuvwxyz" |
| "{|}~\x7f\x80\xff"), |
| // Escaped |
| "%02%0A%1D+!%22%23%24%25%26%27()*%2B,-./0123456789:%3B" |
| "%3C%3D%3E%3F%40ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
| "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz" |
| "%7B%7C%7D~%7F%80%FF"); |
| } |
| |
| TEST(EscapeTest, UnescapeURLComponentASCII) { |
| const UnescapeURLCaseASCII unescape_cases[] = { |
| {"", UnescapeRule::NORMAL, ""}, |
| {"%2", UnescapeRule::NORMAL, "%2"}, |
| {"%%%%%%", UnescapeRule::NORMAL, "%%%%%%"}, |
| {"Don't escape anything", UnescapeRule::NORMAL, "Don't escape anything"}, |
| {"Invalid %escape %2", UnescapeRule::NORMAL, "Invalid %escape %2"}, |
| {"Some%20random text %25%2dOK", UnescapeRule::NONE, |
| "Some%20random text %25%2dOK"}, |
| {"Some%20random text %25%2dOK", UnescapeRule::NORMAL, |
| "Some%20random text %25-OK"}, |
| {"Some%20random text %25%2dOK", UnescapeRule::SPACES, |
| "Some random text %25-OK"}, |
| {"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS, |
| "Some%20random text %-OK"}, |
| {"Some%20random text %25%2dOK", |
| UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, |
| "Some random text %-OK"}, |
| {"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, "\xA0\xB1\xC2\xD3\xE4\xF5"}, |
| {"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, "\xAa\xBb\xCc\xDd\xEe\xFf"}, |
| // Certain URL-sensitive characters should not be unescaped unless asked. |
| {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES, |
| "Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"}, |
| {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", |
| UnescapeRule::URL_SPECIAL_CHARS, |
| "Hello%20%13%10world ## ?? == && %% ++"}, |
| // Control characters. |
| {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS, |
| "%01%02%03%04%05%06%07%08%09 %"}, |
| {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS, |
| "\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"}, |
| {"Hello%20%13%10%02", UnescapeRule::SPACES, "Hello %13%10%02"}, |
| {"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS, "Hello%20\x13\x10\x02"}, |
| }; |
| |
| for (size_t i = 0; i < arraysize(unescape_cases); i++) { |
| std::string str(unescape_cases[i].input); |
| EXPECT_EQ(std::string(unescape_cases[i].output), |
| UnescapeURLComponent(str, unescape_cases[i].rules)); |
| } |
| |
| // Test the NULL character unescaping (which wouldn't work above since those |
| // are just char pointers). |
| std::string input("Null"); |
| input.push_back(0); // Also have a NULL in the input. |
| input.append("%00%39Test"); |
| |
| // When we're unescaping NULLs |
| std::string expected("Null"); |
| expected.push_back(0); |
| expected.push_back(0); |
| expected.append("9Test"); |
| EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS)); |
| |
| // When we're not unescaping NULLs. |
| expected = "Null"; |
| expected.push_back(0); |
| expected.append("%009Test"); |
| EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL)); |
| } |
| |
| TEST(EscapeTest, UnescapeURLComponent) { |
| const UnescapeURLCase unescape_cases[] = { |
| {L"", UnescapeRule::NORMAL, L""}, |
| {L"%2", UnescapeRule::NORMAL, L"%2"}, |
| {L"%%%%%%", UnescapeRule::NORMAL, L"%%%%%%"}, |
| {L"Don't escape anything", UnescapeRule::NORMAL, L"Don't escape anything"}, |
| {L"Invalid %escape %2", UnescapeRule::NORMAL, L"Invalid %escape %2"}, |
| {L"Some%20random text %25%2dOK", UnescapeRule::NONE, |
| L"Some%20random text %25%2dOK"}, |
| {L"Some%20random text %25%2dOK", UnescapeRule::NORMAL, |
| L"Some%20random text %25-OK"}, |
| {L"Some%20random text %25%2dOK", UnescapeRule::SPACES, |
| L"Some random text %25-OK"}, |
| {L"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS, |
| L"Some%20random text %-OK"}, |
| {L"Some%20random text %25%2dOK", |
| UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, |
| L"Some random text %-OK"}, |
| {L"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, L"\xA0\xB1\xC2\xD3\xE4\xF5"}, |
| {L"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, L"\xAa\xBb\xCc\xDd\xEe\xFf"}, |
| // Certain URL-sensitive characters should not be unescaped unless asked. |
| {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES, |
| L"Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"}, |
| {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", |
| UnescapeRule::URL_SPECIAL_CHARS, |
| L"Hello%20%13%10world ## ?? == && %% ++"}, |
| // We can neither escape nor unescape '@' since some websites expect it to |
| // be preserved as either '@' or "%40". |
| // See http://b/996720 and http://crbug.com/23933 . |
| {L"me@my%40example", UnescapeRule::NORMAL, L"me@my%40example"}, |
| // Control characters. |
| {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS, |
| L"%01%02%03%04%05%06%07%08%09 %"}, |
| {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS, |
| L"\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"}, |
| {L"Hello%20%13%10%02", UnescapeRule::SPACES, L"Hello %13%10%02"}, |
| {L"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS, |
| L"Hello%20\x13\x10\x02"}, |
| {L"Hello\x9824\x9827", UnescapeRule::CONTROL_CHARS, |
| L"Hello\x9824\x9827"}, |
| }; |
| |
| for (size_t i = 0; i < arraysize(unescape_cases); i++) { |
| string16 str(WideToUTF16(unescape_cases[i].input)); |
| EXPECT_EQ(WideToUTF16(unescape_cases[i].output), |
| UnescapeURLComponent(str, unescape_cases[i].rules)); |
| } |
| |
| // Test the NULL character unescaping (which wouldn't work above since those |
| // are just char pointers). |
| string16 input(WideToUTF16(L"Null")); |
| input.push_back(0); // Also have a NULL in the input. |
| input.append(WideToUTF16(L"%00%39Test")); |
| |
| // When we're unescaping NULLs |
| string16 expected(WideToUTF16(L"Null")); |
| expected.push_back(0); |
| expected.push_back(0); |
| expected.append(ASCIIToUTF16("9Test")); |
| EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS)); |
| |
| // When we're not unescaping NULLs. |
| expected = WideToUTF16(L"Null"); |
| expected.push_back(0); |
| expected.append(WideToUTF16(L"%009Test")); |
| EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL)); |
| } |
| |
| TEST(EscapeTest, UnescapeAndDecodeUTF8URLComponent) { |
| const UnescapeAndDecodeCase unescape_cases[] = { |
| { "%", |
| "%", |
| "%", |
| L"%"}, |
| { "+", |
| "+", |
| " ", |
| L"+"}, |
| { "%2+", |
| "%2+", |
| "%2 ", |
| L"%2+"}, |
| { "+%%%+%%%", |
| "+%%%+%%%", |
| " %%% %%%", |
| L"+%%%+%%%"}, |
| { "Don't escape anything", |
| "Don't escape anything", |
| "Don't escape anything", |
| L"Don't escape anything"}, |
| { "+Invalid %escape %2+", |
| "+Invalid %escape %2+", |
| " Invalid %escape %2 ", |
| L"+Invalid %escape %2+"}, |
| { "Some random text %25%2dOK", |
| "Some random text %25-OK", |
| "Some random text %25-OK", |
| L"Some random text %25-OK"}, |
| { "%01%02%03%04%05%06%07%08%09", |
| "%01%02%03%04%05%06%07%08%09", |
| "%01%02%03%04%05%06%07%08%09", |
| L"%01%02%03%04%05%06%07%08%09"}, |
| { "%E4%BD%A0+%E5%A5%BD", |
| "\xE4\xBD\xA0+\xE5\xA5\xBD", |
| "\xE4\xBD\xA0 \xE5\xA5\xBD", |
| L"\x4f60+\x597d"}, |
| { "%ED%ED", // Invalid UTF-8. |
| "\xED\xED", |
| "\xED\xED", |
| L"%ED%ED"}, // Invalid UTF-8 -> kept unescaped. |
| }; |
| |
| for (size_t i = 0; i < arraysize(unescape_cases); i++) { |
| std::string unescaped = UnescapeURLComponent(unescape_cases[i].input, |
| UnescapeRule::NORMAL); |
| EXPECT_EQ(std::string(unescape_cases[i].url_unescaped), unescaped); |
| |
| unescaped = UnescapeURLComponent(unescape_cases[i].input, |
| UnescapeRule::REPLACE_PLUS_WITH_SPACE); |
| EXPECT_EQ(std::string(unescape_cases[i].query_unescaped), unescaped); |
| |
| // TODO: Need to test unescape_spaces and unescape_percent. |
| string16 decoded = UnescapeAndDecodeUTF8URLComponent( |
| unescape_cases[i].input, UnescapeRule::NORMAL, NULL); |
| EXPECT_EQ(WideToUTF16Hack(std::wstring(unescape_cases[i].decoded)), |
| decoded); |
| } |
| } |
| |
| TEST(EscapeTest, AdjustOffset) { |
| const AdjustOffsetCase adjust_cases[] = { |
| {"", 0, std::wstring::npos}, |
| {"test", 0, 0}, |
| {"test", 2, 2}, |
| {"test", 4, std::wstring::npos}, |
| {"test", std::wstring::npos, std::wstring::npos}, |
| {"%2dtest", 6, 4}, |
| {"%2dtest", 2, std::wstring::npos}, |
| {"test%2d", 2, 2}, |
| {"%E4%BD%A0+%E5%A5%BD", 9, 1}, |
| {"%E4%BD%A0+%E5%A5%BD", 6, std::wstring::npos}, |
| {"%ED%B0%80+%E5%A5%BD", 6, 6}, |
| }; |
| |
| for (size_t i = 0; i < arraysize(adjust_cases); i++) { |
| size_t offset = adjust_cases[i].input_offset; |
| UnescapeAndDecodeUTF8URLComponent(adjust_cases[i].input, |
| UnescapeRule::NORMAL, &offset); |
| EXPECT_EQ(adjust_cases[i].output_offset, offset); |
| } |
| } |
| |
| TEST(EscapeTest, EscapeForHTML) { |
| const EscapeForHTMLCase tests[] = { |
| { "hello", "hello" }, |
| { "<hello>", "<hello>" }, |
| { "don\'t mess with me", "don't mess with me" }, |
| }; |
| for (size_t i = 0; i < arraysize(tests); ++i) { |
| std::string result = EscapeForHTML(std::string(tests[i].input)); |
| EXPECT_EQ(std::string(tests[i].expected_output), result); |
| } |
| } |
| |
| TEST(EscapeTest, UnescapeForHTML) { |
| const EscapeForHTMLCase tests[] = { |
| { "", "" }, |
| { "<hello>", "<hello>" }, |
| { "don't mess with me", "don\'t mess with me" }, |
| { "<>&"'", "<>&\"'" }, |
| { "& lt; & ; &; '", "& lt; & ; &; '" }, |
| { "&", "&" }, |
| { """, "\"" }, |
| { "'", "'" }, |
| { "<", "<" }, |
| { ">", ">" }, |
| { "& &", "& &" }, |
| }; |
| for (size_t i = 0; i < arraysize(tests); ++i) { |
| string16 result = UnescapeForHTML(ASCIIToUTF16(tests[i].input)); |
| EXPECT_EQ(ASCIIToUTF16(tests[i].expected_output), result); |
| } |
| } |
| |
| TEST(EscapeTest, AdjustEncodingOffset) { |
| // Imagine we have strings as shown in the following cases where the |
| // %XX's represent encoded characters |
| |
| // 1: abc%ECdef ==> abcXdef |
| std::vector<size_t> offsets; |
| for (size_t t = 0; t < 9; ++t) |
| offsets.push_back(t); |
| AdjustEncodingOffset::Adjustments adjustments; |
| adjustments.push_back(3); |
| std::for_each(offsets.begin(), offsets.end(), |
| AdjustEncodingOffset(adjustments)); |
| size_t expected_1[] = {0, 1, 2, 3, kNpos, kNpos, 4, 5, 6}; |
| EXPECT_EQ(offsets.size(), arraysize(expected_1)); |
| for (size_t i = 0; i < arraysize(expected_1); ++i) |
| EXPECT_EQ(expected_1[i], offsets[i]); |
| |
| |
| // 2: %ECabc%EC%ECdef%EC ==> XabcXXdefX |
| offsets.clear(); |
| for (size_t t = 0; t < 18; ++t) |
| offsets.push_back(t); |
| adjustments.clear(); |
| adjustments.push_back(0); |
| adjustments.push_back(6); |
| adjustments.push_back(9); |
| adjustments.push_back(15); |
| std::for_each(offsets.begin(), offsets.end(), |
| AdjustEncodingOffset(adjustments)); |
| size_t expected_2[] = {0, kNpos, kNpos, 1, 2, 3, 4, kNpos, kNpos, 5, kNpos, |
| kNpos, 6, 7, 8, 9, kNpos, kNpos}; |
| EXPECT_EQ(offsets.size(), arraysize(expected_2)); |
| for (size_t i = 0; i < arraysize(expected_2); ++i) |
| EXPECT_EQ(expected_2[i], offsets[i]); |
| } |