blob: 2a9d74cb778a82c93ee55c0c36eaa442bac875c7 [file] [log] [blame]
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/search_engines/template_url.h"
#include "base/i18n/icu_string_conversions.h"
#include "base/i18n/rtl.h"
#include "base/logging.h"
#include "base/string_number_conversions.h"
#include "base/utf_string_conversions.h"
#include "chrome/browser/metrics/user_metrics.h"
#include "chrome/browser/search_engines/search_engine_type.h"
#include "chrome/browser/search_engines/search_terms_data.h"
#include "chrome/browser/search_engines/template_url_model.h"
#include "chrome/common/url_constants.h"
#include "chrome/installer/util/google_update_settings.h"
#include "net/base/escape.h"
#include "ui/base/l10n/l10n_util.h"
#include "ui/gfx/favicon_size.h"
// TODO(pastarmovj): Remove google_update_settings and user_metrics when the
// CollectRLZMetrics function is not needed anymore.
// The TemplateURLRef has any number of terms that need to be replaced. Each of
// the terms is enclosed in braces. If the character preceeding the final
// brace is a ?, it indicates the term is optional and can be replaced with
// an empty string.
static const char kStartParameter = '{';
static const char kEndParameter = '}';
static const char kOptional = '?';
// Known parameters found in the URL.
static const char kSearchTermsParameter[] = "searchTerms";
static const char kSearchTermsParameterFull[] = "{searchTerms}";
static const char kCountParameter[] = "count";
static const char kStartIndexParameter[] = "startIndex";
static const char kStartPageParameter[] = "startPage";
static const char kLanguageParameter[] = "language";
static const char kInputEncodingParameter[] = "inputEncoding";
static const char kOutputEncodingParameter[] = "outputEncoding";
static const char kGoogleAcceptedSuggestionParameter[] =
"google:acceptedSuggestion";
// Host/Domain Google searches are relative to.
static const char kGoogleBaseURLParameter[] = "google:baseURL";
static const char kGoogleBaseURLParameterFull[] = "{google:baseURL}";
// Like google:baseURL, but for the Search Suggest capability.
static const char kGoogleBaseSuggestURLParameter[] =
"google:baseSuggestURL";
static const char kGoogleBaseSuggestURLParameterFull[] =
"{google:baseSuggestURL}";
static const char kGoogleOriginalQueryForSuggestionParameter[] =
"google:originalQueryForSuggestion";
static const char kGoogleRLZParameter[] = "google:RLZ";
// Same as kSearchTermsParameter, with no escaping.
static const char kGoogleUnescapedSearchTermsParameter[] =
"google:unescapedSearchTerms";
static const char kGoogleUnescapedSearchTermsParameterFull[] =
"{google:unescapedSearchTerms}";
// Display value for kSearchTermsParameter.
static const char kDisplaySearchTerms[] = "%s";
// Display value for kGoogleUnescapedSearchTermsParameter.
static const char kDisplayUnescapedSearchTerms[] = "%S";
// Used if the count parameter is not optional. Indicates we want 10 search
// results.
static const char kDefaultCount[] = "10";
// Used if the parameter kOutputEncodingParameter is required.
static const char kOutputEncodingType[] = "UTF-8";
TemplateURLRef::TemplateURLRef() {
Set(std::string(), 0, 0);
}
TemplateURLRef::TemplateURLRef(const std::string& url,
int index_offset,
int page_offset)
: url_(url),
index_offset_(index_offset),
page_offset_(page_offset),
parsed_(false),
valid_(false),
supports_replacements_(false) {
}
void TemplateURLRef::Set(const std::string& url,
int index_offset,
int page_offset) {
url_ = url;
index_offset_ = index_offset;
page_offset_ = page_offset;
InvalidateCachedValues();
}
TemplateURLRef::~TemplateURLRef() {
}
bool TemplateURLRef::ParseParameter(size_t start,
size_t end,
std::string* url,
Replacements* replacements) const {
DCHECK(start != std::string::npos &&
end != std::string::npos && end > start);
size_t length = end - start - 1;
bool optional = false;
if ((*url)[end - 1] == kOptional) {
optional = true;
length--;
}
std::string parameter(url->substr(start + 1, length));
std::string full_parameter(url->substr(start, end - start + 1));
// Remove the parameter from the string.
url->erase(start, end - start + 1);
if (parameter == kSearchTermsParameter) {
replacements->push_back(Replacement(SEARCH_TERMS, start));
} else if (parameter == kCountParameter) {
if (!optional)
url->insert(start, kDefaultCount);
} else if (parameter == kStartIndexParameter) {
if (!optional) {
url->insert(start, base::IntToString(index_offset_));
}
} else if (parameter == kStartPageParameter) {
if (!optional) {
url->insert(start, base::IntToString(page_offset_));
}
} else if (parameter == kLanguageParameter) {
replacements->push_back(Replacement(LANGUAGE, start));
} else if (parameter == kInputEncodingParameter) {
replacements->push_back(Replacement(ENCODING, start));
} else if (parameter == kOutputEncodingParameter) {
if (!optional)
url->insert(start, kOutputEncodingType);
} else if (parameter == kGoogleAcceptedSuggestionParameter) {
replacements->push_back(Replacement(GOOGLE_ACCEPTED_SUGGESTION, start));
} else if (parameter == kGoogleBaseURLParameter) {
replacements->push_back(Replacement(GOOGLE_BASE_URL, start));
} else if (parameter == kGoogleBaseSuggestURLParameter) {
replacements->push_back(Replacement(GOOGLE_BASE_SUGGEST_URL, start));
} else if (parameter == kGoogleOriginalQueryForSuggestionParameter) {
replacements->push_back(Replacement(GOOGLE_ORIGINAL_QUERY_FOR_SUGGESTION,
start));
} else if (parameter == kGoogleRLZParameter) {
replacements->push_back(Replacement(GOOGLE_RLZ, start));
} else if (parameter == kGoogleUnescapedSearchTermsParameter) {
replacements->push_back(Replacement(GOOGLE_UNESCAPED_SEARCH_TERMS, start));
} else {
// It can be some garbage but can also be a javascript block. Put it back.
url->insert(start, full_parameter);
return false;
}
return true;
}
std::string TemplateURLRef::ParseURL(const std::string& url,
Replacements* replacements,
bool* valid) const {
*valid = false;
std::string parsed_url = url;
for (size_t last = 0; last != std::string::npos; ) {
last = parsed_url.find(kStartParameter, last);
if (last != std::string::npos) {
size_t template_end = parsed_url.find(kEndParameter, last);
if (template_end != std::string::npos) {
// Since we allow Javascript in the URL, {} pairs could be nested. Match
// only leaf pairs with supported parameters.
size_t next_template_start = parsed_url.find(kStartParameter, last + 1);
if (next_template_start == std::string::npos ||
next_template_start > template_end) {
// If successful, ParseParameter erases from the string as such no
// need to update |last|. If failed, move |last| to the end of pair.
if (!ParseParameter(last, template_end, &parsed_url, replacements)) {
// |template_end| + 1 may be beyond the end of the string.
last = template_end;
}
} else {
last = next_template_start;
}
} else {
// Open brace without a closing brace, return.
return std::string();
}
}
}
*valid = true;
return parsed_url;
}
void TemplateURLRef::ParseIfNecessary() const {
UIThreadSearchTermsData search_terms_data;
ParseIfNecessaryUsingTermsData(search_terms_data);
}
void TemplateURLRef::ParseIfNecessaryUsingTermsData(
const SearchTermsData& search_terms_data) const {
if (!parsed_) {
parsed_ = true;
parsed_url_ = ParseURL(url_, &replacements_, &valid_);
supports_replacements_ = false;
if (valid_) {
bool has_only_one_search_term = false;
for (Replacements::const_iterator i = replacements_.begin();
i != replacements_.end(); ++i) {
if ((i->type == SEARCH_TERMS) ||
(i->type == GOOGLE_UNESCAPED_SEARCH_TERMS)) {
if (has_only_one_search_term) {
has_only_one_search_term = false;
break;
}
has_only_one_search_term = true;
supports_replacements_ = true;
}
}
// Only parse the host/key if there is one search term. Technically there
// could be more than one term, but it's uncommon; so we punt.
if (has_only_one_search_term)
ParseHostAndSearchTermKey(search_terms_data);
}
}
}
void TemplateURLRef::ParseHostAndSearchTermKey(
const SearchTermsData& search_terms_data) const {
std::string url_string = url_;
ReplaceSubstringsAfterOffset(&url_string, 0,
kGoogleBaseURLParameterFull,
search_terms_data.GoogleBaseURLValue());
ReplaceSubstringsAfterOffset(&url_string, 0,
kGoogleBaseSuggestURLParameterFull,
search_terms_data.GoogleBaseSuggestURLValue());
GURL url(url_string);
if (!url.is_valid())
return;
std::string query_string = url.query();
if (query_string.empty())
return;
url_parse::Component query, key, value;
query.len = static_cast<int>(query_string.size());
while (url_parse::ExtractQueryKeyValue(query_string.c_str(), &query, &key,
&value)) {
if (key.is_nonempty() && value.is_nonempty()) {
std::string value_string = query_string.substr(value.begin, value.len);
if (value_string.find(kSearchTermsParameterFull, 0) !=
std::string::npos ||
value_string.find(kGoogleUnescapedSearchTermsParameterFull, 0) !=
std::string::npos) {
search_term_key_ = query_string.substr(key.begin, key.len);
host_ = url.host();
path_ = url.path();
break;
}
}
}
}
// static
void TemplateURLRef::SetGoogleBaseURL(std::string* google_base_url) {
UIThreadSearchTermsData::SetGoogleBaseURL(google_base_url);
}
std::string TemplateURLRef::ReplaceSearchTerms(
const TemplateURL& host,
const string16& terms,
int accepted_suggestion,
const string16& original_query_for_suggestion) const {
UIThreadSearchTermsData search_terms_data;
return ReplaceSearchTermsUsingTermsData(host,
terms,
accepted_suggestion,
original_query_for_suggestion,
search_terms_data);
}
std::string TemplateURLRef::ReplaceSearchTermsUsingTermsData(
const TemplateURL& host,
const string16& terms,
int accepted_suggestion,
const string16& original_query_for_suggestion,
const SearchTermsData& search_terms_data) const {
ParseIfNecessaryUsingTermsData(search_terms_data);
if (!valid_)
return std::string();
if (replacements_.empty())
return parsed_url_;
// Determine if the search terms are in the query or before. We're escaping
// space as '+' in the former case and as '%20' in the latter case.
bool is_in_query = true;
for (Replacements::iterator i = replacements_.begin();
i != replacements_.end(); ++i) {
if (i->type == SEARCH_TERMS) {
string16::size_type query_start = parsed_url_.find('?');
is_in_query = query_start != string16::npos &&
(static_cast<string16::size_type>(i->index) > query_start);
break;
}
}
string16 encoded_terms;
string16 encoded_original_query;
std::string input_encoding;
// If the search terms are in query - escape them respecting the encoding.
if (is_in_query) {
// Encode the search terms so that we know the encoding.
const std::vector<std::string>& encodings = host.input_encodings();
for (size_t i = 0; i < encodings.size(); ++i) {
if (EscapeQueryParamValue(terms,
encodings[i].c_str(), true,
&encoded_terms)) {
if (!original_query_for_suggestion.empty()) {
EscapeQueryParamValue(original_query_for_suggestion,
encodings[i].c_str(),
true,
&encoded_original_query);
}
input_encoding = encodings[i];
break;
}
}
if (input_encoding.empty()) {
encoded_terms = EscapeQueryParamValueUTF8(terms, true);
if (!original_query_for_suggestion.empty()) {
encoded_original_query =
EscapeQueryParamValueUTF8(original_query_for_suggestion, true);
}
input_encoding = "UTF-8";
}
} else {
encoded_terms = UTF8ToUTF16(EscapePath(UTF16ToUTF8(terms)));
input_encoding = "UTF-8";
}
std::string url = parsed_url_;
// replacements_ is ordered in ascending order, as such we need to iterate
// from the back.
for (Replacements::reverse_iterator i = replacements_.rbegin();
i != replacements_.rend(); ++i) {
switch (i->type) {
case ENCODING:
url.insert(i->index, input_encoding);
break;
case GOOGLE_ACCEPTED_SUGGESTION:
if (accepted_suggestion == NO_SUGGESTION_CHOSEN)
url.insert(i->index, "aq=f&");
else if (accepted_suggestion != NO_SUGGESTIONS_AVAILABLE)
url.insert(i->index, StringPrintf("aq=%d&", accepted_suggestion));
break;
case GOOGLE_BASE_URL:
url.insert(i->index, search_terms_data.GoogleBaseURLValue());
break;
case GOOGLE_BASE_SUGGEST_URL:
url.insert(i->index, search_terms_data.GoogleBaseSuggestURLValue());
break;
case GOOGLE_ORIGINAL_QUERY_FOR_SUGGESTION:
if (accepted_suggestion >= 0)
url.insert(i->index, "oq=" + UTF16ToUTF8(encoded_original_query) +
"&");
break;
case GOOGLE_RLZ: {
// On platforms that don't have RLZ, we still want this branch
// to happen so that we replace the RLZ template with the
// empty string. (If we don't handle this case, we hit a
// NOTREACHED below.)
#if defined(OS_WIN) && defined(GOOGLE_CHROME_BUILD)
string16 rlz_string = search_terms_data.GetRlzParameterValue();
if (!rlz_string.empty()) {
rlz_string = L"rlz=" + rlz_string + L"&";
url.insert(i->index, UTF16ToUTF8(rlz_string));
}
#endif
break;
}
case GOOGLE_UNESCAPED_SEARCH_TERMS: {
std::string unescaped_terms;
base::UTF16ToCodepage(terms, input_encoding.c_str(),
base::OnStringConversionError::SKIP,
&unescaped_terms);
url.insert(i->index, std::string(unescaped_terms.begin(),
unescaped_terms.end()));
break;
}
case LANGUAGE:
url.insert(i->index, search_terms_data.GetApplicationLocale());
break;
case SEARCH_TERMS:
url.insert(i->index, UTF16ToUTF8(encoded_terms));
break;
default:
NOTREACHED();
break;
}
}
return url;
}
bool TemplateURLRef::SupportsReplacement() const {
UIThreadSearchTermsData search_terms_data;
return SupportsReplacementUsingTermsData(search_terms_data);
}
bool TemplateURLRef::SupportsReplacementUsingTermsData(
const SearchTermsData& search_terms_data) const {
ParseIfNecessaryUsingTermsData(search_terms_data);
return valid_ && supports_replacements_;
}
bool TemplateURLRef::IsValid() const {
UIThreadSearchTermsData search_terms_data;
return IsValidUsingTermsData(search_terms_data);
}
bool TemplateURLRef::IsValidUsingTermsData(
const SearchTermsData& search_terms_data) const {
ParseIfNecessaryUsingTermsData(search_terms_data);
return valid_;
}
string16 TemplateURLRef::DisplayURL() const {
ParseIfNecessary();
if (!valid_ || replacements_.empty())
return UTF8ToUTF16(url_);
string16 result = UTF8ToUTF16(url_);
ReplaceSubstringsAfterOffset(&result, 0,
ASCIIToUTF16(kSearchTermsParameterFull),
ASCIIToUTF16(kDisplaySearchTerms));
ReplaceSubstringsAfterOffset(
&result, 0,
ASCIIToUTF16(kGoogleUnescapedSearchTermsParameterFull),
ASCIIToUTF16(kDisplayUnescapedSearchTerms));
return result;
}
// static
std::string TemplateURLRef::DisplayURLToURLRef(
const string16& display_url) {
string16 result = display_url;
ReplaceSubstringsAfterOffset(&result, 0, ASCIIToUTF16(kDisplaySearchTerms),
ASCIIToUTF16(kSearchTermsParameterFull));
ReplaceSubstringsAfterOffset(
&result, 0,
ASCIIToUTF16(kDisplayUnescapedSearchTerms),
ASCIIToUTF16(kGoogleUnescapedSearchTermsParameterFull));
return UTF16ToUTF8(result);
}
const std::string& TemplateURLRef::GetHost() const {
ParseIfNecessary();
return host_;
}
const std::string& TemplateURLRef::GetPath() const {
ParseIfNecessary();
return path_;
}
const std::string& TemplateURLRef::GetSearchTermKey() const {
ParseIfNecessary();
return search_term_key_;
}
string16 TemplateURLRef::SearchTermToString16(const TemplateURL& host,
const std::string& term) const {
const std::vector<std::string>& encodings = host.input_encodings();
string16 result;
std::string unescaped =
UnescapeURLComponent(term, UnescapeRule::REPLACE_PLUS_WITH_SPACE |
UnescapeRule::URL_SPECIAL_CHARS);
for (size_t i = 0; i < encodings.size(); ++i) {
if (base::CodepageToUTF16(unescaped, encodings[i].c_str(),
base::OnStringConversionError::FAIL, &result))
return result;
}
// Always fall back on UTF-8 if it works.
if (base::CodepageToUTF16(unescaped, base::kCodepageUTF8,
base::OnStringConversionError::FAIL, &result))
return result;
// When nothing worked, just use the escaped text. We have no idea what the
// encoding is. We need to substitute spaces for pluses ourselves since we're
// not sending it through an unescaper.
result = UTF8ToUTF16(term);
std::replace(result.begin(), result.end(), '+', ' ');
return result;
}
bool TemplateURLRef::HasGoogleBaseURLs() const {
ParseIfNecessary();
for (size_t i = 0; i < replacements_.size(); ++i) {
if ((replacements_[i].type == GOOGLE_BASE_URL) ||
(replacements_[i].type == GOOGLE_BASE_SUGGEST_URL))
return true;
}
return false;
}
// static
bool TemplateURLRef::SameUrlRefs(const TemplateURLRef* ref1,
const TemplateURLRef* ref2) {
return ref1 == ref2 || (ref1 && ref2 && ref1->url() == ref2->url());
}
void TemplateURLRef::CollectRLZMetrics() const {
#if defined(OS_WIN) && defined(GOOGLE_CHROME_BUILD)
ParseIfNecessary();
for (size_t i = 0; i < replacements_.size(); ++i) {
// We are interesed in searches that were supposed to send the RLZ token.
if (replacements_[i].type == GOOGLE_RLZ) {
string16 brand;
// We only have RLZ tocken on a branded browser version.
if (GoogleUpdateSettings::GetBrand(&brand) && !brand.empty() &&
!GoogleUpdateSettings::IsOrganic(brand)) {
// Now we know we should have had RLZ token check if there was one.
if (url().find("rlz=") != std::string::npos)
UserMetrics::RecordAction(UserMetricsAction("SearchWithRLZ"));
else
UserMetrics::RecordAction(UserMetricsAction("SearchWithoutRLZ"));
}
return;
}
}
#endif
}
void TemplateURLRef::InvalidateCachedValues() const {
supports_replacements_ = valid_ = parsed_ = false;
host_.clear();
path_.clear();
search_term_key_.clear();
replacements_.clear();
}
// TemplateURL ----------------------------------------------------------------
// static
GURL TemplateURL::GenerateFaviconURL(const GURL& url) {
DCHECK(url.is_valid());
GURL::Replacements rep;
const char favicon_path[] = "/favicon.ico";
int favicon_path_len = arraysize(favicon_path) - 1;
rep.SetPath(favicon_path, url_parse::Component(0, favicon_path_len));
rep.ClearUsername();
rep.ClearPassword();
rep.ClearQuery();
rep.ClearRef();
return url.ReplaceComponents(rep);
}
// static
bool TemplateURL::SupportsReplacement(const TemplateURL* turl) {
UIThreadSearchTermsData search_terms_data;
return SupportsReplacementUsingTermsData(turl, search_terms_data);
}
// static
bool TemplateURL::SupportsReplacementUsingTermsData(
const TemplateURL* turl,
const SearchTermsData& search_terms_data) {
return turl && turl->url() &&
turl->url()->SupportsReplacementUsingTermsData(search_terms_data);
}
TemplateURL::TemplateURL()
: autogenerate_keyword_(false),
keyword_generated_(false),
show_in_default_list_(false),
safe_for_autoreplace_(false),
id_(0),
date_created_(base::Time::Now()),
created_by_policy_(false),
usage_count_(0),
search_engine_type_(SEARCH_ENGINE_OTHER),
logo_id_(kNoSearchEngineLogo),
prepopulate_id_(0) {
}
TemplateURL::~TemplateURL() {
}
string16 TemplateURL::AdjustedShortNameForLocaleDirection() const {
string16 bidi_safe_short_name = short_name_;
base::i18n::AdjustStringForLocaleDirection(&bidi_safe_short_name);
return bidi_safe_short_name;
}
void TemplateURL::SetSuggestionsURL(const std::string& suggestions_url,
int index_offset,
int page_offset) {
suggestions_url_.Set(suggestions_url, index_offset, page_offset);
}
void TemplateURL::SetURL(const std::string& url,
int index_offset,
int page_offset) {
url_.Set(url, index_offset, page_offset);
}
void TemplateURL::SetInstantURL(const std::string& url,
int index_offset,
int page_offset) {
instant_url_.Set(url, index_offset, page_offset);
}
void TemplateURL::set_keyword(const string16& keyword) {
// Case sensitive keyword matching is confusing. As such, we force all
// keywords to be lower case.
keyword_ = l10n_util::ToLower(keyword);
autogenerate_keyword_ = false;
}
string16 TemplateURL::keyword() const {
EnsureKeyword();
return keyword_;
}
void TemplateURL::EnsureKeyword() const {
if (autogenerate_keyword_ && !keyword_generated_) {
// Generate a keyword and cache it.
keyword_ = TemplateURLModel::GenerateKeyword(
TemplateURLModel::GenerateSearchURL(this).GetWithEmptyPath(), true);
keyword_generated_ = true;
}
}
bool TemplateURL::ShowInDefaultList() const {
return show_in_default_list() && url() && url()->SupportsReplacement();
}
void TemplateURL::SetFaviconURL(const GURL& url) {
for (std::vector<ImageRef>::iterator i = image_refs_.begin();
i != image_refs_.end(); ++i) {
if (i->type == "image/x-icon" &&
i->width == kFaviconSize && i->height == kFaviconSize) {
if (!url.is_valid())
image_refs_.erase(i);
else
i->url = url;
return;
}
}
// Don't have one yet, add it.
if (url.is_valid()) {
add_image_ref(
TemplateURL::ImageRef("image/x-icon", kFaviconSize,
kFaviconSize, url));
}
}
GURL TemplateURL::GetFaviconURL() const {
for (std::vector<ImageRef>::const_iterator i = image_refs_.begin();
i != image_refs_.end(); ++i) {
if ((i->type == "image/x-icon" || i->type == "image/vnd.microsoft.icon")
&& i->width == kFaviconSize && i->height == kFaviconSize) {
return i->url;
}
}
return GURL();
}
void TemplateURL::InvalidateCachedValues() const {
url_.InvalidateCachedValues();
suggestions_url_.InvalidateCachedValues();
if (autogenerate_keyword_) {
keyword_.clear();
keyword_generated_ = false;
}
}
std::string TemplateURL::GetExtensionId() const {
DCHECK(IsExtensionKeyword());
return GURL(url_.url()).host();
}
bool TemplateURL::IsExtensionKeyword() const {
return GURL(url_.url()).SchemeIs(chrome::kExtensionScheme);
}