chrome/browser/bookmarks/bookmark_index.cc - platform/external/chromium - Git at Google

 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "chrome/browser/bookmarks/bookmark_index.h"

 #include <algorithm>
 #include <iterator>
 #include <list>

 #include "base/string16.h"
 #include "chrome/browser/bookmarks/bookmark_model.h"
 #include "chrome/browser/bookmarks/bookmark_utils.h"
 #include "chrome/browser/history/history_database.h"
 #include "chrome/browser/history/query_parser.h"
 #include "chrome/browser/profiles/profile.h"
 #include "ui/base/l10n/l10n_util.h"

 // Used when finding the set of bookmarks that match a query. Each match
 // represents a set of terms (as an interator into the Index) matching the
 // query as well as the set of nodes that contain those terms in their titles.
 struct BookmarkIndex::Match {
   // List of terms matching the query.
   std::list<Index::const_iterator> terms;

   // The set of nodes matching the terms. As an optimization this is empty
   // when we match only one term, and is filled in when we get more than one
   // term. We can do this as when we have only one matching term we know
   // the set of matching nodes is terms.front()->second.
   //
   // Use nodes_begin() and nodes_end() to get an iterator over the set as
   // it handles the necessary switching between nodes and terms.front().
   NodeSet nodes;

   // Returns an iterator to the beginning of the matching nodes. See
   // description of nodes for why this should be used over nodes.begin().
   NodeSet::const_iterator nodes_begin() const;

   // Returns an iterator to the beginning of the matching nodes. See
   // description of nodes for why this should be used over nodes.end().
   NodeSet::const_iterator nodes_end() const;
 };

 BookmarkIndex::NodeSet::const_iterator
     BookmarkIndex::Match::nodes_begin() const {
   return nodes.empty() ? terms.front()->second.begin() : nodes.begin();
 }

 BookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const {
   return nodes.empty() ? terms.front()->second.end() : nodes.end();
 }

 BookmarkIndex::BookmarkIndex(Profile* profile) : profile_(profile) {
 }

 BookmarkIndex::~BookmarkIndex() {
 }

 void BookmarkIndex::Add(const BookmarkNode* node) {
   if (!node->is_url())
     return;
   std::vector<string16> terms = ExtractQueryWords(node->GetTitle());
   for (size_t i = 0; i < terms.size(); ++i)
     RegisterNode(terms[i], node);
 }

 void BookmarkIndex::Remove(const BookmarkNode* node) {
   if (!node->is_url())
     return;

   std::vector<string16> terms = ExtractQueryWords(node->GetTitle());
   for (size_t i = 0; i < terms.size(); ++i)
     UnregisterNode(terms[i], node);
 }

 void BookmarkIndex::GetBookmarksWithTitlesMatching(
     const string16& query,
     size_t max_count,
     std::vector<bookmark_utils::TitleMatch>* results) {
   std::vector<string16> terms = ExtractQueryWords(query);
   if (terms.empty())
     return;

   Matches matches;
   for (size_t i = 0; i < terms.size(); ++i) {
     if (!GetBookmarksWithTitleMatchingTerm(terms[i], i == 0, &matches))
       return;
   }

   NodeTypedCountPairs node_typed_counts;
   SortMatches(matches, &node_typed_counts);

   // We use a QueryParser to fill in match positions for us. It's not the most
   // efficient way to go about this, but by the time we get here we know what
   // matches and so this shouldn't be performance critical.
   QueryParser parser;
   ScopedVector<QueryNode> query_nodes;
   parser.ParseQuery(query, &query_nodes.get());

   // The highest typed counts should be at the beginning of the results vector
   // so that the best matches will always be included in the results. The loop
   // that calculates result relevance in HistoryContentsProvider::ConvertResults
   // will run backwards to assure higher relevance will be attributed to the
   // best matches.
   for (NodeTypedCountPairs::const_iterator i = node_typed_counts.begin();
        i != node_typed_counts.end() && results->size() < max_count; ++i)
     AddMatchToResults(i->first, &parser, query_nodes.get(), results);
 }

 void BookmarkIndex::SortMatches(const Matches& matches,
                                 NodeTypedCountPairs* node_typed_counts) const {
   HistoryService* const history_service = profile_ ?
       profile_->GetHistoryService(Profile::EXPLICIT_ACCESS) : NULL;

   history::URLDatabase* url_db = history_service ?
       history_service->InMemoryDatabase() : NULL;

   for (Matches::const_iterator i = matches.begin(); i != matches.end(); ++i)
     ExtractBookmarkNodePairs(url_db, *i, node_typed_counts);

   std::sort(node_typed_counts->begin(), node_typed_counts->end(),
             &NodeTypedCountPairSortFunc);
 }

 void BookmarkIndex::ExtractBookmarkNodePairs(
     history::URLDatabase* url_db,
     const Match& match,
     NodeTypedCountPairs* node_typed_counts) const {

   for (NodeSet::const_iterator i = match.nodes_begin();
        i != match.nodes_end(); ++i) {
     history::URLRow url;
     if (url_db)
       url_db->GetRowForURL((*i)->GetURL(), &url);
     NodeTypedCountPair pair(*i, url.typed_count());
     node_typed_counts->push_back(pair);
   }
 }

 void BookmarkIndex::AddMatchToResults(
     const BookmarkNode* node,
     QueryParser* parser,
     const std::vector<QueryNode*>& query_nodes,
     std::vector<bookmark_utils::TitleMatch>* results) {
   bookmark_utils::TitleMatch title_match;
   // Check that the result matches the query.  The previous search
   // was a simple per-word search, while the more complex matching
   // of QueryParser may filter it out.  For example, the query
   // ["thi"] will match the bookmark titled [Thinking], but since
   // ["thi"] is quoted we don't want to do a prefix match.
   if (parser->DoesQueryMatch(node->GetTitle(), query_nodes,
                              &(title_match.match_positions))) {
     title_match.node = node;
     results->push_back(title_match);
   }
 }

 bool BookmarkIndex::GetBookmarksWithTitleMatchingTerm(const string16& term,
                                                       bool first_term,
                                                       Matches* matches) {
   Index::const_iterator i = index_.lower_bound(term);
   if (i == index_.end())
     return false;

   if (!QueryParser::IsWordLongEnoughForPrefixSearch(term)) {
     // Term is too short for prefix match, compare using exact match.
     if (i->first != term)
       return false;  // No bookmarks with this term.

     if (first_term) {
       Match match;
       match.terms.push_back(i);
       matches->push_back(match);
       return true;
     }
     CombineMatchesInPlace(i, matches);
   } else if (first_term) {
     // This is the first term and we're doing a prefix match. Loop through
     // index adding all entries that start with term to matches.
     while (i != index_.end() &&
            i->first.size() >= term.size() &&
            term.compare(0, term.size(), i->first, 0, term.size()) == 0) {
       Match match;
       match.terms.push_back(i);
       matches->push_back(match);
       ++i;
     }
   } else {
     // Prefix match and not the first term. Loop through index combining
     // current matches in matches with term, placing result in result.
     Matches result;
     while (i != index_.end() &&
            i->first.size() >= term.size() &&
            term.compare(0, term.size(), i->first, 0, term.size()) == 0) {
       CombineMatches(i, *matches, &result);
       ++i;
     }
     matches->swap(result);
   }
   return !matches->empty();
 }

 void BookmarkIndex::CombineMatchesInPlace(const Index::const_iterator& index_i,
                                           Matches* matches) {
   for (size_t i = 0; i < matches->size(); ) {
     Match* match = &((*matches)[i]);
     NodeSet intersection;
     std::set_intersection(match->nodes_begin(), match->nodes_end(),
                           index_i->second.begin(), index_i->second.end(),
                           std::inserter(intersection, intersection.begin()));
     if (intersection.empty()) {
       matches->erase(matches->begin() + i);
     } else {
       match->terms.push_back(index_i);
       match->nodes.swap(intersection);
       ++i;
     }
   }
 }

 void BookmarkIndex::CombineMatches(const Index::const_iterator& index_i,
                                    const Matches& current_matches,
                                    Matches* result) {
   for (size_t i = 0; i < current_matches.size(); ++i) {
     const Match& match = current_matches[i];
     NodeSet intersection;
     std::set_intersection(match.nodes_begin(), match.nodes_end(),
                           index_i->second.begin(), index_i->second.end(),
                           std::inserter(intersection, intersection.begin()));
     if (!intersection.empty()) {
       result->push_back(Match());
       Match& combined_match = result->back();
       combined_match.terms = match.terms;
       combined_match.terms.push_back(index_i);
       combined_match.nodes.swap(intersection);
     }
   }
 }

 std::vector<string16> BookmarkIndex::ExtractQueryWords(const string16& query) {
   std::vector<string16> terms;
   if (query.empty())
     return std::vector<string16>();
   QueryParser parser;
   // TODO: use ICU normalization.
   parser.ExtractQueryWords(l10n_util::ToLower(query), &terms);
   return terms;
 }

 void BookmarkIndex::RegisterNode(const string16& term,
                                  const BookmarkNode* node) {
   if (std::find(index_[term].begin(), index_[term].end(), node) !=
       index_[term].end()) {
     // We've already added node for term.
     return;
   }
   index_[term].insert(node);
 }

 void BookmarkIndex::UnregisterNode(const string16& term,
                                    const BookmarkNode* node) {
   Index::iterator i = index_.find(term);
   if (i == index_.end()) {
     // We can get here if the node has the same term more than once. For
     // example, a bookmark with the title 'foo foo' would end up here.
     return;
   }
   i->second.erase(node);
   if (i->second.empty())
     index_.erase(i);
 }
	// Copyright (c) 2010 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "chrome/browser/bookmarks/bookmark_index.h"

	#include <algorithm>
	#include <iterator>
	#include <list>

	#include "base/string16.h"
	#include "chrome/browser/bookmarks/bookmark_model.h"
	#include "chrome/browser/bookmarks/bookmark_utils.h"
	#include "chrome/browser/history/history_database.h"
	#include "chrome/browser/history/query_parser.h"
	#include "chrome/browser/profiles/profile.h"
	#include "ui/base/l10n/l10n_util.h"

	// Used when finding the set of bookmarks that match a query. Each match
	// represents a set of terms (as an interator into the Index) matching the
	// query as well as the set of nodes that contain those terms in their titles.
	struct BookmarkIndex::Match {
	// List of terms matching the query.
	std::list<Index::const_iterator> terms;

	// The set of nodes matching the terms. As an optimization this is empty
	// when we match only one term, and is filled in when we get more than one
	// term. We can do this as when we have only one matching term we know
	// the set of matching nodes is terms.front()->second.
	//
	// Use nodes_begin() and nodes_end() to get an iterator over the set as
	// it handles the necessary switching between nodes and terms.front().
	NodeSet nodes;

	// Returns an iterator to the beginning of the matching nodes. See
	// description of nodes for why this should be used over nodes.begin().
	NodeSet::const_iterator nodes_begin() const;

	// Returns an iterator to the beginning of the matching nodes. See
	// description of nodes for why this should be used over nodes.end().
	NodeSet::const_iterator nodes_end() const;
	};

	BookmarkIndex::NodeSet::const_iterator
	BookmarkIndex::Match::nodes_begin() const {
	return nodes.empty() ? terms.front()->second.begin() : nodes.begin();
	}

	BookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const {
	return nodes.empty() ? terms.front()->second.end() : nodes.end();
	}

	BookmarkIndex::BookmarkIndex(Profile* profile) : profile_(profile) {
	}

	BookmarkIndex::~BookmarkIndex() {
	}

	void BookmarkIndex::Add(const BookmarkNode* node) {
	if (!node->is_url())
	return;
	std::vector<string16> terms = ExtractQueryWords(node->GetTitle());
	for (size_t i = 0; i < terms.size(); ++i)
	RegisterNode(terms[i], node);
	}

	void BookmarkIndex::Remove(const BookmarkNode* node) {
	if (!node->is_url())
	return;

	std::vector<string16> terms = ExtractQueryWords(node->GetTitle());
	for (size_t i = 0; i < terms.size(); ++i)
	UnregisterNode(terms[i], node);
	}

	void BookmarkIndex::GetBookmarksWithTitlesMatching(
	const string16& query,
	size_t max_count,
	std::vector<bookmark_utils::TitleMatch>* results) {
	std::vector<string16> terms = ExtractQueryWords(query);
	if (terms.empty())
	return;

	Matches matches;
	for (size_t i = 0; i < terms.size(); ++i) {
	if (!GetBookmarksWithTitleMatchingTerm(terms[i], i == 0, &matches))
	return;
	}

	NodeTypedCountPairs node_typed_counts;
	SortMatches(matches, &node_typed_counts);

	// We use a QueryParser to fill in match positions for us. It's not the most
	// efficient way to go about this, but by the time we get here we know what
	// matches and so this shouldn't be performance critical.
	QueryParser parser;
	ScopedVector<QueryNode> query_nodes;
	parser.ParseQuery(query, &query_nodes.get());

	// The highest typed counts should be at the beginning of the results vector
	// so that the best matches will always be included in the results. The loop
	// that calculates result relevance in HistoryContentsProvider::ConvertResults
	// will run backwards to assure higher relevance will be attributed to the
	// best matches.
	for (NodeTypedCountPairs::const_iterator i = node_typed_counts.begin();
	i != node_typed_counts.end() && results->size() < max_count; ++i)
	AddMatchToResults(i->first, &parser, query_nodes.get(), results);
	}

	void BookmarkIndex::SortMatches(const Matches& matches,
	NodeTypedCountPairs* node_typed_counts) const {
	HistoryService* const history_service = profile_ ?
	profile_->GetHistoryService(Profile::EXPLICIT_ACCESS) : NULL;

	history::URLDatabase* url_db = history_service ?
	history_service->InMemoryDatabase() : NULL;

	for (Matches::const_iterator i = matches.begin(); i != matches.end(); ++i)
	ExtractBookmarkNodePairs(url_db, *i, node_typed_counts);

	std::sort(node_typed_counts->begin(), node_typed_counts->end(),
	&NodeTypedCountPairSortFunc);
	}

	void BookmarkIndex::ExtractBookmarkNodePairs(
	history::URLDatabase* url_db,
	const Match& match,
	NodeTypedCountPairs* node_typed_counts) const {

	for (NodeSet::const_iterator i = match.nodes_begin();
	i != match.nodes_end(); ++i) {
	history::URLRow url;
	if (url_db)
	url_db->GetRowForURL((*i)->GetURL(), &url);
	NodeTypedCountPair pair(*i, url.typed_count());
	node_typed_counts->push_back(pair);
	}
	}

	void BookmarkIndex::AddMatchToResults(
	const BookmarkNode* node,
	QueryParser* parser,
	const std::vector<QueryNode*>& query_nodes,
	std::vector<bookmark_utils::TitleMatch>* results) {
	bookmark_utils::TitleMatch title_match;
	// Check that the result matches the query. The previous search
	// was a simple per-word search, while the more complex matching
	// of QueryParser may filter it out. For example, the query
	// ["thi"] will match the bookmark titled [Thinking], but since
	// ["thi"] is quoted we don't want to do a prefix match.
	if (parser->DoesQueryMatch(node->GetTitle(), query_nodes,
	&(title_match.match_positions))) {
	title_match.node = node;
	results->push_back(title_match);
	}
	}

	bool BookmarkIndex::GetBookmarksWithTitleMatchingTerm(const string16& term,
	bool first_term,
	Matches* matches) {
	Index::const_iterator i = index_.lower_bound(term);
	if (i == index_.end())
	return false;

	if (!QueryParser::IsWordLongEnoughForPrefixSearch(term)) {
	// Term is too short for prefix match, compare using exact match.
	if (i->first != term)
	return false; // No bookmarks with this term.

	if (first_term) {
	Match match;
	match.terms.push_back(i);
	matches->push_back(match);
	return true;
	}
	CombineMatchesInPlace(i, matches);
	} else if (first_term) {
	// This is the first term and we're doing a prefix match. Loop through
	// index adding all entries that start with term to matches.
	while (i != index_.end() &&
	i->first.size() >= term.size() &&
	term.compare(0, term.size(), i->first, 0, term.size()) == 0) {
	Match match;
	match.terms.push_back(i);
	matches->push_back(match);
	++i;
	}
	} else {
	// Prefix match and not the first term. Loop through index combining
	// current matches in matches with term, placing result in result.
	Matches result;
	while (i != index_.end() &&
	i->first.size() >= term.size() &&
	term.compare(0, term.size(), i->first, 0, term.size()) == 0) {
	CombineMatches(i, *matches, &result);
	++i;
	}
	matches->swap(result);
	}
	return !matches->empty();
	}

	void BookmarkIndex::CombineMatchesInPlace(const Index::const_iterator& index_i,
	Matches* matches) {
	for (size_t i = 0; i < matches->size(); ) {
	Match* match = &((*matches)[i]);
	NodeSet intersection;
	std::set_intersection(match->nodes_begin(), match->nodes_end(),
	index_i->second.begin(), index_i->second.end(),
	std::inserter(intersection, intersection.begin()));
	if (intersection.empty()) {
	matches->erase(matches->begin() + i);
	} else {
	match->terms.push_back(index_i);
	match->nodes.swap(intersection);
	++i;
	}
	}
	}

	void BookmarkIndex::CombineMatches(const Index::const_iterator& index_i,
	const Matches& current_matches,
	Matches* result) {
	for (size_t i = 0; i < current_matches.size(); ++i) {
	const Match& match = current_matches[i];
	NodeSet intersection;
	std::set_intersection(match.nodes_begin(), match.nodes_end(),
	index_i->second.begin(), index_i->second.end(),
	std::inserter(intersection, intersection.begin()));
	if (!intersection.empty()) {
	result->push_back(Match());
	Match& combined_match = result->back();
	combined_match.terms = match.terms;
	combined_match.terms.push_back(index_i);
	combined_match.nodes.swap(intersection);
	}
	}
	}

	std::vector<string16> BookmarkIndex::ExtractQueryWords(const string16& query) {
	std::vector<string16> terms;
	if (query.empty())
	return std::vector<string16>();
	QueryParser parser;
	// TODO: use ICU normalization.
	parser.ExtractQueryWords(l10n_util::ToLower(query), &terms);
	return terms;
	}

	void BookmarkIndex::RegisterNode(const string16& term,
	const BookmarkNode* node) {
	if (std::find(index_[term].begin(), index_[term].end(), node) !=
	index_[term].end()) {
	// We've already added node for term.
	return;
	}
	index_[term].insert(node);
	}

	void BookmarkIndex::UnregisterNode(const string16& term,
	const BookmarkNode* node) {
	Index::iterator i = index_.find(term);
	if (i == index_.end()) {
	// We can get here if the node has the same term more than once. For
	// example, a bookmark with the title 'foo foo' would end up here.
	return;
	}
	i->second.erase(node);
	if (i->second.empty())
	index_.erase(i);
	}