| // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "chrome/common/extensions/url_pattern.h" |
| |
| #include "base/string_piece.h" |
| #include "base/string_split.h" |
| #include "base/string_util.h" |
| #include "chrome/common/url_constants.h" |
| #include "googleurl/src/gurl.h" |
| #include "googleurl/src/url_util.h" |
| |
| const char URLPattern::kAllUrlsPattern[] = "<all_urls>"; |
| |
| namespace { |
| |
| // TODO(aa): Consider adding chrome-extension? What about more obscure ones |
| // like data: and javascript: ? |
| // Note: keep this array in sync with kValidSchemeMasks. |
| const char* kValidSchemes[] = { |
| chrome::kHttpScheme, |
| chrome::kHttpsScheme, |
| chrome::kFileScheme, |
| chrome::kFtpScheme, |
| chrome::kChromeUIScheme, |
| chrome::kFileSystemScheme, |
| }; |
| |
| const int kValidSchemeMasks[] = { |
| URLPattern::SCHEME_HTTP, |
| URLPattern::SCHEME_HTTPS, |
| URLPattern::SCHEME_FILE, |
| URLPattern::SCHEME_FTP, |
| URLPattern::SCHEME_CHROMEUI, |
| URLPattern::SCHEME_FILESYSTEM, |
| }; |
| |
| COMPILE_ASSERT(arraysize(kValidSchemes) == arraysize(kValidSchemeMasks), |
| must_keep_these_arrays_in_sync); |
| |
| const char* kParseSuccess = "Success."; |
| const char* kParseErrorMissingSchemeSeparator = "Missing scheme separator."; |
| const char* kParseErrorInvalidScheme = "Invalid scheme."; |
| const char* kParseErrorWrongSchemeType = "Wrong scheme type."; |
| const char* kParseErrorEmptyHost = "Host can not be empty."; |
| const char* kParseErrorInvalidHostWildcard = "Invalid host wildcard."; |
| const char* kParseErrorEmptyPath = "Empty path."; |
| const char* kParseErrorHasColon = |
| "Ports are not supported in URL patterns. ':' may not be used in a host."; |
| |
| // Message explaining each URLPattern::ParseResult. |
| const char* kParseResultMessages[] = { |
| kParseSuccess, |
| kParseErrorMissingSchemeSeparator, |
| kParseErrorInvalidScheme, |
| kParseErrorWrongSchemeType, |
| kParseErrorEmptyHost, |
| kParseErrorInvalidHostWildcard, |
| kParseErrorEmptyPath, |
| kParseErrorHasColon |
| }; |
| |
| COMPILE_ASSERT(URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages), |
| must_add_message_for_each_parse_result); |
| |
| const char kPathSeparator[] = "/"; |
| |
| bool IsStandardScheme(const std::string& scheme) { |
| // "*" gets the same treatment as a standard scheme. |
| if (scheme == "*") |
| return true; |
| |
| return url_util::IsStandard(scheme.c_str(), |
| url_parse::Component(0, static_cast<int>(scheme.length()))); |
| } |
| |
| } // namespace |
| |
| URLPattern::URLPattern() |
| : valid_schemes_(SCHEME_NONE), |
| match_all_urls_(false), |
| match_subdomains_(false) {} |
| |
| URLPattern::URLPattern(int valid_schemes) |
| : valid_schemes_(valid_schemes), match_all_urls_(false), |
| match_subdomains_(false) {} |
| |
| URLPattern::URLPattern(int valid_schemes, const std::string& pattern) |
| : valid_schemes_(valid_schemes), match_all_urls_(false), |
| match_subdomains_(false) { |
| |
| // Strict error checking is used, because this constructor is only |
| // appropriate when we know |pattern| is valid. |
| if (PARSE_SUCCESS != Parse(pattern, PARSE_STRICT)) |
| NOTREACHED() << "URLPattern is invalid: " << pattern; |
| } |
| |
| URLPattern::~URLPattern() { |
| } |
| |
| URLPattern::ParseResult URLPattern::Parse(const std::string& pattern, |
| ParseOption strictness) { |
| CHECK(strictness == PARSE_LENIENT || |
| strictness == PARSE_STRICT); |
| |
| // Special case pattern to match every valid URL. |
| if (pattern == kAllUrlsPattern) { |
| match_all_urls_ = true; |
| match_subdomains_ = true; |
| scheme_ = "*"; |
| host_.clear(); |
| SetPath("/*"); |
| return PARSE_SUCCESS; |
| } |
| |
| // Parse out the scheme. |
| size_t scheme_end_pos = pattern.find(chrome::kStandardSchemeSeparator); |
| bool has_standard_scheme_separator = true; |
| |
| // Some urls also use ':' alone as the scheme separator. |
| if (scheme_end_pos == std::string::npos) { |
| scheme_end_pos = pattern.find(':'); |
| has_standard_scheme_separator = false; |
| } |
| |
| if (scheme_end_pos == std::string::npos) |
| return PARSE_ERROR_MISSING_SCHEME_SEPARATOR; |
| |
| if (!SetScheme(pattern.substr(0, scheme_end_pos))) |
| return PARSE_ERROR_INVALID_SCHEME; |
| |
| bool standard_scheme = IsStandardScheme(scheme_); |
| if (standard_scheme != has_standard_scheme_separator) |
| return PARSE_ERROR_WRONG_SCHEME_SEPARATOR; |
| |
| // Advance past the scheme separator. |
| scheme_end_pos += |
| (standard_scheme ? strlen(chrome::kStandardSchemeSeparator) : 1); |
| if (scheme_end_pos >= pattern.size()) |
| return PARSE_ERROR_EMPTY_HOST; |
| |
| // Parse out the host and path. |
| size_t host_start_pos = scheme_end_pos; |
| size_t path_start_pos = 0; |
| |
| // File URLs are special because they have no host. |
| if (scheme_ == chrome::kFileScheme || !standard_scheme) { |
| path_start_pos = host_start_pos; |
| } else { |
| size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos); |
| |
| // Host is required. |
| if (host_start_pos == host_end_pos) |
| return PARSE_ERROR_EMPTY_HOST; |
| |
| if (host_end_pos == std::string::npos) |
| return PARSE_ERROR_EMPTY_PATH; |
| |
| host_ = pattern.substr(host_start_pos, host_end_pos - host_start_pos); |
| |
| // The first component can optionally be '*' to match all subdomains. |
| std::vector<std::string> host_components; |
| base::SplitString(host_, '.', &host_components); |
| if (host_components[0] == "*") { |
| match_subdomains_ = true; |
| host_components.erase(host_components.begin(), |
| host_components.begin() + 1); |
| } |
| host_ = JoinString(host_components, '.'); |
| |
| // No other '*' can occur in the host, though. This isn't necessary, but is |
| // done as a convenience to developers who might otherwise be confused and |
| // think '*' works as a glob in the host. |
| if (host_.find('*') != std::string::npos) |
| return PARSE_ERROR_INVALID_HOST_WILDCARD; |
| |
| path_start_pos = host_end_pos; |
| } |
| |
| SetPath(pattern.substr(path_start_pos)); |
| |
| if (strictness == PARSE_STRICT && host_.find(':') != std::string::npos) |
| return PARSE_ERROR_HAS_COLON; |
| |
| return PARSE_SUCCESS; |
| } |
| |
| bool URLPattern::SetScheme(const std::string& scheme) { |
| scheme_ = scheme; |
| if (scheme_ == "*") { |
| valid_schemes_ &= (SCHEME_HTTP | SCHEME_HTTPS); |
| } else if (!IsValidScheme(scheme_)) { |
| return false; |
| } |
| return true; |
| } |
| |
| bool URLPattern::IsValidScheme(const std::string& scheme) const { |
| if (valid_schemes_ == SCHEME_ALL) |
| return true; |
| |
| for (size_t i = 0; i < arraysize(kValidSchemes); ++i) { |
| if (scheme == kValidSchemes[i] && (valid_schemes_ & kValidSchemeMasks[i])) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| void URLPattern::SetPath(const std::string& path) { |
| path_ = path; |
| path_escaped_ = path_; |
| ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\"); |
| ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?"); |
| } |
| |
| bool URLPattern::MatchesUrl(const GURL &test) const { |
| if (!MatchesScheme(test.scheme())) |
| return false; |
| |
| if (match_all_urls_) |
| return true; |
| |
| if (!MatchesHost(test)) |
| return false; |
| |
| if (!MatchesPath(test.PathForRequest())) |
| return false; |
| |
| return true; |
| } |
| |
| bool URLPattern::MatchesScheme(const std::string& test) const { |
| if (!IsValidScheme(test)) |
| return false; |
| |
| return scheme_ == "*" || test == scheme_; |
| } |
| |
| bool URLPattern::MatchesHost(const std::string& host) const { |
| std::string test(chrome::kHttpScheme); |
| test += chrome::kStandardSchemeSeparator; |
| test += host; |
| test += "/"; |
| return MatchesHost(GURL(test)); |
| } |
| |
| bool URLPattern::MatchesHost(const GURL& test) const { |
| // If the hosts are exactly equal, we have a match. |
| if (test.host() == host_) |
| return true; |
| |
| // If we're matching subdomains, and we have no host in the match pattern, |
| // that means that we're matching all hosts, which means we have a match no |
| // matter what the test host is. |
| if (match_subdomains_ && host_.empty()) |
| return true; |
| |
| // Otherwise, we can only match if our match pattern matches subdomains. |
| if (!match_subdomains_) |
| return false; |
| |
| // We don't do subdomain matching against IP addresses, so we can give up now |
| // if the test host is an IP address. |
| if (test.HostIsIPAddress()) |
| return false; |
| |
| // Check if the test host is a subdomain of our host. |
| if (test.host().length() <= (host_.length() + 1)) |
| return false; |
| |
| if (test.host().compare(test.host().length() - host_.length(), |
| host_.length(), host_) != 0) |
| return false; |
| |
| return test.host()[test.host().length() - host_.length() - 1] == '.'; |
| } |
| |
| bool URLPattern::MatchesPath(const std::string& test) const { |
| if (!MatchPattern(test, path_escaped_)) |
| return false; |
| |
| return true; |
| } |
| |
| std::string URLPattern::GetAsString() const { |
| if (match_all_urls_) |
| return kAllUrlsPattern; |
| |
| bool standard_scheme = IsStandardScheme(scheme_); |
| |
| std::string spec = scheme_ + |
| (standard_scheme ? chrome::kStandardSchemeSeparator : ":"); |
| |
| if (scheme_ != chrome::kFileScheme && standard_scheme) { |
| if (match_subdomains_) { |
| spec += "*"; |
| if (!host_.empty()) |
| spec += "."; |
| } |
| |
| if (!host_.empty()) |
| spec += host_; |
| } |
| |
| if (!path_.empty()) |
| spec += path_; |
| |
| return spec; |
| } |
| |
| bool URLPattern::OverlapsWith(const URLPattern& other) const { |
| if (!MatchesScheme(other.scheme_) && !other.MatchesScheme(scheme_)) |
| return false; |
| |
| if (!MatchesHost(other.host()) && !other.MatchesHost(host_)) |
| return false; |
| |
| // We currently only use OverlapsWith() for the patterns inside |
| // ExtensionExtent. In those cases, we know that the path will have only a |
| // single wildcard at the end. This makes figuring out overlap much easier. It |
| // seems like there is probably a computer-sciency way to solve the general |
| // case, but we don't need that yet. |
| DCHECK(path_.find('*') == path_.size() - 1); |
| DCHECK(other.path().find('*') == other.path().size() - 1); |
| |
| if (!MatchesPath(other.path().substr(0, other.path().size() - 1)) && |
| !other.MatchesPath(path_.substr(0, path_.size() - 1))) |
| return false; |
| |
| return true; |
| } |
| |
| std::vector<URLPattern> URLPattern::ConvertToExplicitSchemes() const { |
| std::vector<URLPattern> result; |
| |
| if (scheme_ != "*" && !match_all_urls_ && IsValidScheme(scheme_)) { |
| result.push_back(*this); |
| return result; |
| } |
| |
| for (size_t i = 0; i < arraysize(kValidSchemes); ++i) { |
| if (MatchesScheme(kValidSchemes[i])) { |
| URLPattern temp = *this; |
| temp.SetScheme(kValidSchemes[i]); |
| temp.set_match_all_urls(false); |
| result.push_back(temp); |
| } |
| } |
| |
| return result; |
| } |
| |
| // static |
| const char* URLPattern::GetParseResultString( |
| URLPattern::ParseResult parse_result) { |
| return kParseResultMessages[parse_result]; |
| } |