| // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "chrome/browser/net/referrer.h" |
| |
| #include <limits.h> |
| |
| #include "base/compiler_specific.h" |
| #include "base/logging.h" |
| #include "base/message_loop.h" |
| #include "base/values.h" |
| #include "chrome/browser/net/predictor.h" |
| |
| namespace chrome_browser_net { |
| |
| //------------------------------------------------------------------------------ |
| // Smoothing parameter for updating subresource_use_rate_. |
| |
| // We always combine our old expected value, weighted by some factor W (we use |
| // kWeightingForOldConnectsExpectedValue), with the new expected value Enew. |
| // The new "expected value" is the number of actual connections made due to the |
| // current navigations. |
| // That means that IF we end up needing to connect, we should apply the formula: |
| // Eupdated = Eold * W + Enew * (1 - W) |
| // If we visit the containing url, but don't end up needing a connection, then |
| // Enew == 0, so we use the formula: |
| // Eupdated = Eold * W |
| // To achieve the above updating algorithm, we end up doing the multiplication |
| // by W every time we contemplate doing a preconnection (i.e., when we navigate |
| // to the containing URL, and consider doing a preconnection), and then IFF we |
| // learn that we really needed a connection to the subresource, we complete the |
| // above algorithm by adding the (1 - W) for each connection we make. |
| |
| // We weight the new expected value by a factor which is in the range of 0.0 to |
| // 1.0. |
| static const double kWeightingForOldConnectsExpectedValue = 0.66; |
| |
| // To estimate the expected value of the number of connections that we'll need |
| // when a referrer is navigated to, we start with the following low initial |
| // value. |
| // Each time we do indeed (again) need the subresource, this value will get |
| // increased. |
| // Each time we navigate to the refererrer but never end up needing this |
| // subresource, the value will decrease. |
| // Very conservative is 0.0, which will mean that we have to wait for a while |
| // before doing much speculative acvtivity. We do persist results, so we'll |
| // save the asymptotic (correct?) learned answer in the long run. |
| // Some browsers blindly make 2 connections all the time, so we'll use that as |
| // a starting point. |
| static const double kInitialConnectsExpectedValue = 2.0; |
| |
| Referrer::Referrer() : use_count_(1) {} |
| |
| void Referrer::SuggestHost(const GURL& url) { |
| // Limit how large our list can get, in case we make mistakes about what |
| // hostnames are in sub-resources (example: Some advertisments have a link to |
| // the ad agency, and then provide a "surprising" redirect to the advertised |
| // entity, which then (mistakenly) appears to be a subresource on the page |
| // hosting the ad). |
| // TODO(jar): Do experiments to optimize the max count of suggestions. |
| static const size_t kMaxSuggestions = 10; |
| |
| if (!url.has_host()) // TODO(jar): Is this really needed???? |
| return; |
| DCHECK(url == url.GetWithEmptyPath()); |
| SubresourceMap::iterator it = find(url); |
| if (it != end()) { |
| it->second.SubresourceIsNeeded(); |
| return; |
| } |
| |
| if (kMaxSuggestions <= size()) { |
| DeleteLeastUseful(); |
| DCHECK(kMaxSuggestions > size()); |
| } |
| (*this)[url].SubresourceIsNeeded(); |
| } |
| |
| void Referrer::DeleteLeastUseful() { |
| // Find the item with the lowest value. Most important is preconnection_rate, |
| // and least is lifetime (age). |
| GURL least_useful_url; |
| double lowest_rate_seen = 0.0; |
| // We use longs for durations because we will use multiplication on them. |
| int64 least_useful_lifetime = 0; // Duration in milliseconds. |
| |
| const base::Time kNow(base::Time::Now()); // Avoid multiple calls. |
| for (SubresourceMap::iterator it = begin(); it != end(); ++it) { |
| int64 lifetime = (kNow - it->second.birth_time()).InMilliseconds(); |
| double rate = it->second.subresource_use_rate(); |
| if (least_useful_url.has_host()) { |
| if (rate > lowest_rate_seen) |
| continue; |
| if (lifetime <= least_useful_lifetime) |
| continue; |
| } |
| least_useful_url = it->first; |
| lowest_rate_seen = rate; |
| least_useful_lifetime = lifetime; |
| } |
| if (least_useful_url.has_host()) |
| erase(least_useful_url); |
| } |
| |
| bool Referrer::Trim(double reduce_rate, double threshold) { |
| std::vector<GURL> discarded_urls; |
| for (SubresourceMap::iterator it = begin(); it != end(); ++it) { |
| if (!it->second.Trim(reduce_rate, threshold)) |
| discarded_urls.push_back(it->first); |
| } |
| for (size_t i = 0; i < discarded_urls.size(); ++i) |
| erase(discarded_urls[i]); |
| return size() > 0; |
| } |
| |
| bool ReferrerValue::Trim(double reduce_rate, double threshold) { |
| subresource_use_rate_ *= reduce_rate; |
| return subresource_use_rate_ > threshold; |
| } |
| |
| |
| void Referrer::Deserialize(const Value& value) { |
| if (value.GetType() != Value::TYPE_LIST) |
| return; |
| const ListValue* subresource_list(static_cast<const ListValue*>(&value)); |
| size_t index = 0; // Bounds checking is done by subresource_list->Get*(). |
| while (true) { |
| std::string url_spec; |
| if (!subresource_list->GetString(index++, &url_spec)) |
| return; |
| double rate; |
| if (!subresource_list->GetDouble(index++, &rate)) |
| return; |
| |
| GURL url(url_spec); |
| // TODO(jar): We could be more direct, and change birth date or similar to |
| // show that this is a resurrected value we're adding in. I'm not yet sure |
| // of how best to optimize the learning and pruning (Trim) algorithm at this |
| // level, so for now, we just suggest subresources, which leaves them all |
| // with the same birth date (typically start of process). |
| SuggestHost(url); |
| (*this)[url].SetSubresourceUseRate(rate); |
| } |
| } |
| |
| Value* Referrer::Serialize() const { |
| ListValue* subresource_list(new ListValue); |
| for (const_iterator it = begin(); it != end(); ++it) { |
| StringValue* url_spec(new StringValue(it->first.spec())); |
| FundamentalValue* rate(new FundamentalValue( |
| it->second.subresource_use_rate())); |
| |
| subresource_list->Append(url_spec); |
| subresource_list->Append(rate); |
| } |
| return subresource_list; |
| } |
| |
| //------------------------------------------------------------------------------ |
| |
| ReferrerValue::ReferrerValue() |
| : birth_time_(base::Time::Now()), |
| navigation_count_(0), |
| preconnection_count_(0), |
| preresolution_count_(0), |
| subresource_use_rate_(kInitialConnectsExpectedValue) { |
| } |
| |
| void ReferrerValue::SubresourceIsNeeded() { |
| DCHECK_GE(kWeightingForOldConnectsExpectedValue, 0); |
| DCHECK_LE(kWeightingForOldConnectsExpectedValue, 1.0); |
| ++navigation_count_; |
| subresource_use_rate_ += 1 - kWeightingForOldConnectsExpectedValue; |
| } |
| |
| void ReferrerValue::ReferrerWasObserved() { |
| subresource_use_rate_ *= kWeightingForOldConnectsExpectedValue; |
| // Note: the use rate is temporarilly possibly incorect, as we need to find |
| // out if we really end up connecting. This will happen in a few hundred |
| // milliseconds (when content arrives, etc.). |
| // Value of subresource_use_rate_ should be sampled before this call. |
| } |
| |
| } // namespace chrome_browser_net |