| // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_ |
| #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_ |
| #pragma once |
| |
| #include <set> |
| #include <vector> |
| |
| #include "base/file_path.h" |
| #include "base/memory/scoped_ptr.h" |
| #include "base/synchronization/lock.h" |
| #include "base/task.h" |
| #include "chrome/browser/safe_browsing/safe_browsing_store.h" |
| #include "testing/gtest/include/gtest/gtest_prod.h" |
| |
| namespace base { |
| class Time; |
| } |
| |
| namespace safe_browsing { |
| class PrefixSet; |
| } |
| |
| class BloomFilter; |
| class GURL; |
| class MessageLoop; |
| class SafeBrowsingDatabase; |
| |
| // Factory for creating SafeBrowsingDatabase. Tests implement this factory |
| // to create fake Databases for testing. |
| class SafeBrowsingDatabaseFactory { |
| public: |
| SafeBrowsingDatabaseFactory() { } |
| virtual ~SafeBrowsingDatabaseFactory() { } |
| virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( |
| bool enable_download_protection, |
| bool enable_client_side_whitelist) = 0; |
| private: |
| DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactory); |
| }; |
| |
| |
| // Encapsulates on-disk databases that for safebrowsing. There are |
| // three databases: browse, download and client-side detection (csd) |
| // whitelist databases. The browse database contains information |
| // about phishing and malware urls. The download database contains |
| // URLs for bad binaries (e.g: those containing virus) and hash of |
| // these downloaded contents. The csd whitelist database contains URLs |
| // that will never be considered as phishing by the client-side |
| // phishing detection. These on-disk databases are shared among all |
| // profiles, as it doesn't contain user-specific data. This object is |
| // not thread-safe, i.e. all its methods should be used on the same |
| // thread that it was created on. |
| class SafeBrowsingDatabase { |
| public: |
| // Factory method for obtaining a SafeBrowsingDatabase implementation. |
| // It is not thread safe. |
| // |enable_download_protection| is used to control the download database |
| // feature. |
| // |enable_client_side_whitelist| is used to control the csd whitelist |
| // database feature. |
| static SafeBrowsingDatabase* Create(bool enable_download_protection, |
| bool enable_client_side_whitelist); |
| |
| // Makes the passed |factory| the factory used to instantiate |
| // a SafeBrowsingDatabase. This is used for tests. |
| static void RegisterFactory(SafeBrowsingDatabaseFactory* factory) { |
| factory_ = factory; |
| } |
| |
| virtual ~SafeBrowsingDatabase(); |
| |
| // Initializes the database with the given filename. |
| virtual void Init(const FilePath& filename) = 0; |
| |
| // Deletes the current database and creates a new one. |
| virtual bool ResetDatabase() = 0; |
| |
| // Returns false if |url| is not in the browse database. If it |
| // returns true, then either |matching_list| is the name of the matching |
| // list, or |prefix_hits| and |full_hits| contains the matching hash |
| // prefixes. This function is safe to call from threads other than |
| // the creation thread. |
| virtual bool ContainsBrowseUrl(const GURL& url, |
| std::string* matching_list, |
| std::vector<SBPrefix>* prefix_hits, |
| std::vector<SBFullHashResult>* full_hits, |
| base::Time last_update) = 0; |
| |
| // Returns false if none of |urls| are in Download database. If it returns |
| // true, |prefix_hits| should contain the prefixes for the URLs that were in |
| // the database. This function could ONLY be accessed from creation thread. |
| virtual bool ContainsDownloadUrl(const std::vector<GURL>& urls, |
| std::vector<SBPrefix>* prefix_hits) = 0; |
| |
| // Returns false if |prefix| is not in Download database. |
| // This function could ONLY be accessed from creation thread. |
| virtual bool ContainsDownloadHashPrefix(const SBPrefix& prefix) = 0; |
| |
| // Returns false if |url| is not on the client-side phishing detection |
| // whitelist. Otherwise, this function returns true. Note: the whitelist |
| // only contains full-length hashes so we don't return any prefix hit. |
| // This function should only be called from the IO thread. |
| virtual bool ContainsCsdWhitelistedUrl(const GURL& url) = 0; |
| |
| // A database transaction should look like: |
| // |
| // std::vector<SBListChunkRanges> lists; |
| // if (db.UpdateStarted(&lists)) { |
| // // Do something with |lists|. |
| // |
| // // Process add/sub commands. |
| // db.InsertChunks(list_name, chunks); |
| // |
| // // Process adddel/subdel commands. |
| // db.DeleteChunks(chunks_deletes); |
| // |
| // // If passed true, processes the collected chunk info and |
| // // rebuilds the bloom filter. If passed false, rolls everything |
| // // back. |
| // db.UpdateFinished(success); |
| // } |
| // |
| // If UpdateStarted() returns true, the caller MUST eventually call |
| // UpdateFinished(). If it returns false, the caller MUST NOT call |
| // the other functions. |
| virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists) = 0; |
| virtual void InsertChunks(const std::string& list_name, |
| const SBChunkList& chunks) = 0; |
| virtual void DeleteChunks( |
| const std::vector<SBChunkDelete>& chunk_deletes) = 0; |
| virtual void UpdateFinished(bool update_succeeded) = 0; |
| |
| // Store the results of a GetHash response. In the case of empty results, we |
| // cache the prefixes until the next update so that we don't have to issue |
| // further GetHash requests we know will be empty. |
| virtual void CacheHashResults( |
| const std::vector<SBPrefix>& prefixes, |
| const std::vector<SBFullHashResult>& full_hits) = 0; |
| |
| // The name of the bloom-filter file for the given database file. |
| static FilePath BloomFilterForFilename(const FilePath& db_filename); |
| |
| // Filename for malware and phishing URL database. |
| static FilePath BrowseDBFilename(const FilePath& db_base_filename); |
| |
| // Filename for download URL and download binary hash database. |
| static FilePath DownloadDBFilename(const FilePath& db_base_filename); |
| |
| // Filename for client-side phishing detection whitelist databsae. |
| static FilePath CsdWhitelistDBFilename( |
| const FilePath& csd_whitelist_base_filename); |
| |
| // Enumerate failures for histogramming purposes. DO NOT CHANGE THE |
| // ORDERING OF THESE VALUES. |
| enum FailureType { |
| FAILURE_DATABASE_CORRUPT, |
| FAILURE_DATABASE_CORRUPT_HANDLER, |
| FAILURE_BROWSE_DATABASE_UPDATE_BEGIN, |
| FAILURE_BROWSE_DATABASE_UPDATE_FINISH, |
| FAILURE_DATABASE_FILTER_MISSING, |
| FAILURE_DATABASE_FILTER_READ, |
| FAILURE_DATABASE_FILTER_WRITE, |
| FAILURE_DATABASE_FILTER_DELETE, |
| FAILURE_DATABASE_STORE_MISSING, |
| FAILURE_DATABASE_STORE_DELETE, |
| FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN, |
| FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH, |
| FAILURE_CSD_WHITELIST_DATABASE_UPDATE_BEGIN, |
| FAILURE_CSD_WHITELIST_DATABASE_UPDATE_FINISH, |
| |
| // Memory space for histograms is determined by the max. ALWAYS |
| // ADD NEW VALUES BEFORE THIS ONE. |
| FAILURE_DATABASE_MAX |
| }; |
| |
| static void RecordFailure(FailureType failure_type); |
| |
| private: |
| // The factory used to instantiate a SafeBrowsingDatabase object. |
| // Useful for tests, so they can provide their own implementation of |
| // SafeBrowsingDatabase. |
| static SafeBrowsingDatabaseFactory* factory_; |
| }; |
| |
| class SafeBrowsingDatabaseNew : public SafeBrowsingDatabase { |
| public: |
| // Create a database with a browse store, download store and |
| // csd_whitelist_store. Takes ownership of browse_store, download_store and |
| // csd_whitelist_store. When |download_store| is NULL, the database |
| // will ignore any operations related download (url hashes and |
| // binary hashes). Same for the |csd_whitelist_store|. |
| SafeBrowsingDatabaseNew(SafeBrowsingStore* browse_store, |
| SafeBrowsingStore* download_store, |
| SafeBrowsingStore* csd_whitelist_store); |
| |
| // Create a database with a browse store. This is a legacy interface that |
| // useds Sqlite. |
| SafeBrowsingDatabaseNew(); |
| |
| virtual ~SafeBrowsingDatabaseNew(); |
| |
| // Implement SafeBrowsingDatabase interface. |
| virtual void Init(const FilePath& filename); |
| virtual bool ResetDatabase(); |
| virtual bool ContainsBrowseUrl(const GURL& url, |
| std::string* matching_list, |
| std::vector<SBPrefix>* prefix_hits, |
| std::vector<SBFullHashResult>* full_hits, |
| base::Time last_update); |
| virtual bool ContainsDownloadUrl(const std::vector<GURL>& urls, |
| std::vector<SBPrefix>* prefix_hits); |
| virtual bool ContainsDownloadHashPrefix(const SBPrefix& prefix); |
| virtual bool ContainsCsdWhitelistedUrl(const GURL& url); |
| virtual bool UpdateStarted(std::vector<SBListChunkRanges>* lists); |
| virtual void InsertChunks(const std::string& list_name, |
| const SBChunkList& chunks); |
| virtual void DeleteChunks(const std::vector<SBChunkDelete>& chunk_deletes); |
| virtual void UpdateFinished(bool update_succeeded); |
| virtual void CacheHashResults(const std::vector<SBPrefix>& prefixes, |
| const std::vector<SBFullHashResult>& full_hits); |
| |
| private: |
| friend class SafeBrowsingDatabaseTest; |
| FRIEND_TEST(SafeBrowsingDatabaseTest, HashCaching); |
| |
| // Return the browse_store_, download_store_ or csd_whitelist_store_ |
| // based on list_id. |
| SafeBrowsingStore* GetStore(int list_id); |
| |
| // Deletes the files on disk. |
| bool Delete(); |
| |
| // Load the bloom filter off disk, or generates one if it doesn't exist. |
| void LoadBloomFilter(); |
| |
| // Writes the current bloom filter to disk. |
| void WriteBloomFilter(); |
| |
| // Loads the given full-length hashes to the csd whitelist. If the number |
| // of hashes is too large or if the kill switch URL is on the whitelist |
| // we will whitelist all URLs. |
| void LoadCsdWhitelist(const std::vector<SBAddFullHash>& full_hashes); |
| |
| // Call this method if an error occured with the csd whitelist. This will |
| // result in all calls to ContainsCsdWhitelistedUrl() to returning true. |
| void CsdWhitelistAllUrls(); |
| |
| // Helpers for handling database corruption. |
| // |OnHandleCorruptDatabase()| runs |ResetDatabase()| and sets |
| // |corruption_detected_|, |HandleCorruptDatabase()| posts |
| // |OnHandleCorruptDatabase()| to the current thread, to be run |
| // after the current task completes. |
| // TODO(shess): Wire things up to entirely abort the update |
| // transaction when this happens. |
| void HandleCorruptDatabase(); |
| void OnHandleCorruptDatabase(); |
| |
| // Helpers for InsertChunks(). |
| void InsertAdd(int chunk, SBPrefix host, const SBEntry* entry, int list_id); |
| void InsertAddChunks(int list_id, const SBChunkList& chunks); |
| void InsertSub(int chunk, SBPrefix host, const SBEntry* entry, int list_id); |
| void InsertSubChunks(int list_id, const SBChunkList& chunks); |
| |
| void UpdateDownloadStore(); |
| void UpdateBrowseStore(); |
| void UpdateCsdWhitelistStore(); |
| |
| // Helper function to compare addprefixes in download_store_ with |prefixes|. |
| // The |list_bit| indicates which list (download url or download hash) |
| // to compare. |
| // Returns true if there is a match, |*prefix_hits| will contain the actual |
| // matching prefixes. |
| bool MatchDownloadAddPrefixes(int list_bit, |
| const std::vector<SBPrefix>& prefixes, |
| std::vector<SBPrefix>* prefix_hits); |
| |
| // Used to verify that various calls are made from the thread the |
| // object was created on. |
| MessageLoop* creation_loop_; |
| |
| // Lock for protecting access to variables that may be used on the |
| // IO thread. This includes |browse_bloom_filter_|, |full_browse_hashes_|, |
| // |pending_browse_hashes_|, |prefix_miss_cache_|, |csd_whitelist_|, and |
| // |csd_whitelist_all_urls_|. |
| base::Lock lookup_lock_; |
| |
| // Underlying persistent store for chunk data. |
| // For browsing related (phishing and malware URLs) chunks and prefixes. |
| FilePath browse_filename_; |
| scoped_ptr<SafeBrowsingStore> browse_store_; |
| |
| // For download related (download URL and binary hash) chunks and prefixes. |
| FilePath download_filename_; |
| scoped_ptr<SafeBrowsingStore> download_store_; |
| |
| // For the client-side phishing detection whitelist chunks and full-length |
| // hashes. This list only contains 256 bit hashes. |
| FilePath csd_whitelist_filename_; |
| scoped_ptr<SafeBrowsingStore> csd_whitelist_store_; |
| |
| // All the client-side phishing detection whitelist entries are loaded in |
| // a sorted vector. |
| std::vector<SBFullHash> csd_whitelist_; |
| |
| // If true, ContainsCsdWhitelistedUrl will always return true for all URLs. |
| // This is set to true if the csd whitelist is too large to be stored in |
| // memory, if the kill switch URL is on the csd whitelist or if there was |
| // an error during the most recent update. |
| bool csd_whitelist_all_urls_; |
| |
| // Bloom filter generated from the add-prefixes in |browse_store_|. |
| // Only browse_store_ requires the BloomFilter for fast query. |
| FilePath bloom_filter_filename_; |
| scoped_refptr<BloomFilter> browse_bloom_filter_; |
| |
| // Cached browse store related full-hash items, ordered by prefix for |
| // efficient scanning. |
| // |full_browse_hashes_| are items from |browse_store_|, |
| // |pending_browse_hashes_| are items from |CacheHashResults()|, which |
| // will be pushed to the store on the next update. |
| std::vector<SBAddFullHash> full_browse_hashes_; |
| std::vector<SBAddFullHash> pending_browse_hashes_; |
| |
| // Cache of prefixes that returned empty results (no full hash |
| // match) to |CacheHashResults()|. Cached to prevent asking for |
| // them every time. Cleared on next update. |
| std::set<SBPrefix> prefix_miss_cache_; |
| |
| // Used to schedule resetting the database because of corruption. |
| ScopedRunnableMethodFactory<SafeBrowsingDatabaseNew> reset_factory_; |
| |
| // Set if corruption is detected during the course of an update. |
| // Causes the update functions to fail with no side effects, until |
| // the next call to |UpdateStarted()|. |
| bool corruption_detected_; |
| |
| // Set to true if any chunks are added or deleted during an update. |
| // Used to optimize away database update. |
| bool change_detected_; |
| |
| // Used to check if a prefix was in the database. |
| scoped_ptr<safe_browsing::PrefixSet> prefix_set_; |
| }; |
| |
| #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_DATABASE_H_ |