| // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| // |
| // Parse the data returned from the SafeBrowsing v2.1 protocol response. |
| |
| #include <stdlib.h> |
| |
| #include "chrome/browser/safe_browsing/protocol_parser.h" |
| #include "chrome/browser/safe_browsing/safe_browsing_util.h" |
| |
| #include "build/build_config.h" |
| |
| #if defined(OS_WIN) |
| #include <Winsock2.h> |
| #elif defined(OS_POSIX) |
| #include <arpa/inet.h> |
| #endif |
| |
| #include "base/format_macros.h" |
| #include "base/logging.h" |
| #include "base/string_split.h" |
| #include "base/string_util.h" |
| |
| namespace { |
| // Helper function for quick scans of a line oriented protocol. Note that we use |
| // std::string::assign(const charT* s, size_type n) |
| // to copy data into 'line'. This form of 'assign' does not call strlen on |
| // 'input', which is binary data and is not NULL terminated. 'input' may also |
| // contain valid NULL bytes in the payload, which a strlen based copy would |
| // truncate. |
| bool GetLine(const char* input, int input_len, std::string* line) { |
| const char* pos = input; |
| while (pos && (pos - input < input_len)) { |
| if (*pos == '\n') { |
| line->assign(input, pos - input); |
| return true; |
| } |
| ++pos; |
| } |
| return false; |
| } |
| } |
| |
| //------------------------------------------------------------------------------ |
| // SafeBrowsingParser implementation |
| |
| SafeBrowsingProtocolParser::SafeBrowsingProtocolParser() { |
| } |
| |
| bool SafeBrowsingProtocolParser::ParseGetHash( |
| const char* chunk_data, |
| int chunk_len, |
| const std::string& key, |
| bool* re_key, |
| std::vector<SBFullHashResult>* full_hashes) { |
| full_hashes->clear(); |
| int length = chunk_len; |
| const char* data = chunk_data; |
| |
| int offset; |
| std::string line; |
| if (!key.empty()) { |
| if (!GetLine(data, length, &line)) |
| return false; // Error! Bad GetHash result. |
| |
| if (line == "e:pleaserekey") { |
| *re_key = true; |
| return true; |
| } |
| |
| offset = static_cast<int>(line.size()) + 1; |
| data += offset; |
| length -= offset; |
| |
| if (!safe_browsing_util::VerifyMAC(key, line, data, length)) |
| return false; |
| } |
| |
| while (length > 0) { |
| if (!GetLine(data, length, &line)) |
| return false; |
| |
| offset = static_cast<int>(line.size()) + 1; |
| data += offset; |
| length -= offset; |
| |
| std::vector<std::string> cmd_parts; |
| base::SplitString(line, ':', &cmd_parts); |
| if (cmd_parts.size() != 3) |
| return false; |
| |
| SBFullHashResult full_hash; |
| full_hash.list_name = cmd_parts[0]; |
| full_hash.add_chunk_id = atoi(cmd_parts[1].c_str()); |
| int full_hash_len = atoi(cmd_parts[2].c_str()); |
| |
| // Ignore hash results from lists we don't recognize. |
| if (safe_browsing_util::GetListId(full_hash.list_name) < 0) { |
| data += full_hash_len; |
| length -= full_hash_len; |
| continue; |
| } |
| |
| while (full_hash_len > 0) { |
| DCHECK(static_cast<size_t>(full_hash_len) >= sizeof(SBFullHash)); |
| memcpy(&full_hash.hash, data, sizeof(SBFullHash)); |
| full_hashes->push_back(full_hash); |
| data += sizeof(SBFullHash); |
| length -= sizeof(SBFullHash); |
| full_hash_len -= sizeof(SBFullHash); |
| } |
| } |
| |
| return length == 0; |
| } |
| |
| void SafeBrowsingProtocolParser::FormatGetHash( |
| const std::vector<SBPrefix>& prefixes, std::string* request) { |
| DCHECK(request); |
| |
| // Format the request for GetHash. |
| request->append(StringPrintf("%" PRIuS ":%" PRIuS "\n", |
| sizeof(SBPrefix), |
| sizeof(SBPrefix) * prefixes.size())); |
| for (size_t i = 0; i < prefixes.size(); ++i) { |
| request->append(reinterpret_cast<const char*>(&prefixes[i]), |
| sizeof(SBPrefix)); |
| } |
| } |
| |
| bool SafeBrowsingProtocolParser::ParseUpdate( |
| const char* chunk_data, |
| int chunk_len, |
| const std::string& key, |
| int* next_update_sec, |
| bool* re_key, |
| bool* reset, |
| std::vector<SBChunkDelete>* deletes, |
| std::vector<ChunkUrl>* chunk_urls) { |
| DCHECK(next_update_sec); |
| DCHECK(deletes); |
| DCHECK(chunk_urls); |
| |
| int length = chunk_len; |
| const char* data = chunk_data; |
| |
| // Populated below. |
| std::string list_name; |
| |
| while (length > 0) { |
| std::string cmd_line; |
| if (!GetLine(data, length, &cmd_line)) |
| return false; // Error: bad list format! |
| |
| std::vector<std::string> cmd_parts; |
| base::SplitString(cmd_line, ':', &cmd_parts); |
| if (cmd_parts.empty()) |
| return false; |
| const std::string& command = cmd_parts[0]; |
| if (cmd_parts.size() != 2 && command[0] != 'u') |
| return false; |
| |
| const int consumed = static_cast<int>(cmd_line.size()) + 1; |
| data += consumed; |
| length -= consumed; |
| if (length < 0) |
| return false; // Parsing error. |
| |
| // Differentiate on the first character of the command (which is usually |
| // only one character, with the exception of the 'ad' and 'sd' commands). |
| switch (command[0]) { |
| case 'a': |
| case 's': { |
| // Must be either an 'ad' (add-del) or 'sd' (sub-del) chunk. We must |
| // have also parsed the list name before getting here, or the add-del |
| // or sub-del will have no context. |
| if (command.size() != 2 || command[1] != 'd' || list_name.empty()) |
| return false; |
| SBChunkDelete chunk_delete; |
| chunk_delete.is_sub_del = command[0] == 's'; |
| StringToRanges(cmd_parts[1], &chunk_delete.chunk_del); |
| chunk_delete.list_name = list_name; |
| deletes->push_back(chunk_delete); |
| break; |
| } |
| |
| case 'e': |
| if (cmd_parts[1] != "pleaserekey") |
| return false; |
| *re_key = true; |
| break; |
| |
| case 'i': |
| // The line providing the name of the list (i.e. 'goog-phish-shavar'). |
| list_name = cmd_parts[1]; |
| break; |
| |
| case 'm': |
| // Verify that the MAC of the remainer of this chunk is what we expect. |
| if (!key.empty() && |
| !safe_browsing_util::VerifyMAC(key, cmd_parts[1], data, length)) |
| return false; |
| break; |
| |
| case 'n': |
| // The line providing the next earliest time (in seconds) to re-query. |
| *next_update_sec = atoi(cmd_parts[1].c_str()); |
| break; |
| |
| case 'u': { |
| // The redirect command is of the form: u:<url>,<mac> where <url> can |
| // contain multiple colons, commas or any valid URL characters. We scan |
| // backwards in the string looking for the first ',' we encounter and |
| // assume that everything before that is the URL and everything after |
| // is the MAC (if the MAC was requested). |
| std::string mac; |
| std::string redirect_url(cmd_line, 2); // Skip the initial "u:". |
| if (!key.empty()) { |
| std::string::size_type mac_pos = redirect_url.rfind(','); |
| if (mac_pos == std::string::npos) |
| return false; |
| mac = redirect_url.substr(mac_pos + 1); |
| redirect_url = redirect_url.substr(0, mac_pos); |
| } |
| |
| ChunkUrl chunk_url; |
| chunk_url.url = redirect_url; |
| chunk_url.list_name = list_name; |
| if (!key.empty()) |
| chunk_url.mac = mac; |
| chunk_urls->push_back(chunk_url); |
| break; |
| } |
| |
| case 'r': |
| if (cmd_parts[1] != "pleasereset") |
| return false; |
| *reset = true; |
| break; |
| |
| default: |
| // According to the spec, we ignore commands we don't understand. |
| break; |
| } |
| } |
| |
| return true; |
| } |
| |
| bool SafeBrowsingProtocolParser::ParseChunk(const std::string& list_name, |
| const char* data, |
| int length, |
| const std::string& key, |
| const std::string& mac, |
| bool* re_key, |
| SBChunkList* chunks) { |
| int remaining = length; |
| const char* chunk_data = data; |
| |
| if (!key.empty() && |
| !safe_browsing_util::VerifyMAC(key, mac, data, length)) { |
| return false; |
| } |
| |
| while (remaining > 0) { |
| std::string cmd_line; |
| if (!GetLine(chunk_data, length, &cmd_line)) |
| return false; // Error: bad chunk format! |
| |
| const int line_len = static_cast<int>(cmd_line.length()) + 1; |
| chunk_data += line_len; |
| remaining -= line_len; |
| std::vector<std::string> cmd_parts; |
| base::SplitString(cmd_line, ':', &cmd_parts); |
| |
| // Handle a possible re-key command. |
| if (cmd_parts.size() != 4) { |
| if (cmd_parts.size() == 2 && |
| cmd_parts[0] == "e" && |
| cmd_parts[1] == "pleaserekey") { |
| *re_key = true; |
| continue; |
| } |
| return false; |
| } |
| |
| // Process the chunk data. |
| const int chunk_number = atoi(cmd_parts[1].c_str()); |
| const int hash_len = atoi(cmd_parts[2].c_str()); |
| if (hash_len != sizeof(SBPrefix) && hash_len != sizeof(SBFullHash)) { |
| VLOG(1) << "ParseChunk got unknown hashlen " << hash_len; |
| return false; |
| } |
| |
| const int chunk_len = atoi(cmd_parts[3].c_str()); |
| |
| if (remaining < chunk_len) |
| return false; // parse error. |
| |
| chunks->push_back(SBChunk()); |
| chunks->back().chunk_number = chunk_number; |
| |
| if (cmd_parts[0] == "a") { |
| chunks->back().is_add = true; |
| if (!ParseAddChunk(list_name, chunk_data, chunk_len, hash_len, |
| &chunks->back().hosts)) |
| return false; // Parse error. |
| } else if (cmd_parts[0] == "s") { |
| chunks->back().is_add = false; |
| if (!ParseSubChunk(list_name, chunk_data, chunk_len, hash_len, |
| &chunks->back().hosts)) |
| return false; // Parse error. |
| } else { |
| NOTREACHED(); |
| return false; |
| } |
| |
| chunk_data += chunk_len; |
| remaining -= chunk_len; |
| DCHECK_LE(0, remaining); |
| } |
| |
| DCHECK(remaining == 0); |
| |
| return true; |
| } |
| |
| bool SafeBrowsingProtocolParser::ParseAddChunk(const std::string& list_name, |
| const char* data, |
| int data_len, |
| int hash_len, |
| std::deque<SBChunkHost>* hosts) { |
| const char* chunk_data = data; |
| int remaining = data_len; |
| int prefix_count; |
| SBEntry::Type type = hash_len == sizeof(SBPrefix) ? |
| SBEntry::ADD_PREFIX : SBEntry::ADD_FULL_HASH; |
| |
| if (list_name == safe_browsing_util::kBinHashList) { |
| // kBinHashList only contains prefixes, no HOSTKEY and COUNT. |
| DCHECK_EQ(0, remaining % hash_len); |
| prefix_count = remaining / hash_len; |
| SBChunkHost chunk_host; |
| chunk_host.host = 0; |
| chunk_host.entry = SBEntry::Create(type, prefix_count); |
| hosts->push_back(chunk_host); |
| if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry, prefix_count)) |
| return false; |
| } else { |
| SBPrefix host; |
| const int min_size = sizeof(SBPrefix) + 1; |
| while (remaining >= min_size) { |
| ReadHostAndPrefixCount(&chunk_data, &remaining, &host, &prefix_count); |
| SBChunkHost chunk_host; |
| chunk_host.host = host; |
| chunk_host.entry = SBEntry::Create(type, prefix_count); |
| hosts->push_back(chunk_host); |
| if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry, |
| prefix_count)) |
| return false; |
| } |
| } |
| return remaining == 0; |
| } |
| |
| bool SafeBrowsingProtocolParser::ParseSubChunk(const std::string& list_name, |
| const char* data, |
| int data_len, |
| int hash_len, |
| std::deque<SBChunkHost>* hosts) { |
| int remaining = data_len; |
| const char* chunk_data = data; |
| int prefix_count; |
| SBEntry::Type type = hash_len == sizeof(SBPrefix) ? |
| SBEntry::SUB_PREFIX : SBEntry::SUB_FULL_HASH; |
| |
| if (list_name == safe_browsing_util::kBinHashList) { |
| SBChunkHost chunk_host; |
| // Set host to 0 and it won't be used for kBinHashList. |
| chunk_host.host = 0; |
| // kBinHashList only contains (add_chunk_number, prefix) pairs, no HOSTKEY |
| // and COUNT. |add_chunk_number| is int32. |
| prefix_count = remaining / (sizeof(int32) + hash_len); |
| chunk_host.entry = SBEntry::Create(type, prefix_count); |
| if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry, prefix_count)) |
| return false; |
| hosts->push_back(chunk_host); |
| } else { |
| SBPrefix host; |
| const int min_size = 2 * sizeof(SBPrefix) + 1; |
| while (remaining >= min_size) { |
| ReadHostAndPrefixCount(&chunk_data, &remaining, &host, &prefix_count); |
| SBChunkHost chunk_host; |
| chunk_host.host = host; |
| chunk_host.entry = SBEntry::Create(type, prefix_count); |
| hosts->push_back(chunk_host); |
| if (prefix_count == 0) { |
| // There is only an add chunk number (no prefixes). |
| chunk_host.entry->set_chunk_id(ReadChunkId(&chunk_data, &remaining)); |
| continue; |
| } |
| if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry, |
| prefix_count)) |
| return false; |
| } |
| } |
| return remaining == 0; |
| } |
| |
| void SafeBrowsingProtocolParser::ReadHostAndPrefixCount( |
| const char** data, int* remaining, SBPrefix* host, int* count) { |
| // Next 4 bytes are the host prefix. |
| memcpy(host, *data, sizeof(SBPrefix)); |
| *data += sizeof(SBPrefix); |
| *remaining -= sizeof(SBPrefix); |
| |
| // Next 1 byte is the prefix count (could be zero, but never negative). |
| *count = static_cast<unsigned char>(**data); |
| *data += 1; |
| *remaining -= 1; |
| } |
| |
| int SafeBrowsingProtocolParser::ReadChunkId( |
| const char** data, int* remaining) { |
| int chunk_number; |
| memcpy(&chunk_number, *data, sizeof(chunk_number)); |
| *data += sizeof(chunk_number); |
| *remaining -= sizeof(chunk_number); |
| return htonl(chunk_number); |
| } |
| |
| bool SafeBrowsingProtocolParser::ReadPrefixes( |
| const char** data, int* remaining, SBEntry* entry, int count) { |
| int hash_len = entry->HashLen(); |
| for (int i = 0; i < count; ++i) { |
| if (entry->IsSub()) { |
| entry->SetChunkIdAtPrefix(i, ReadChunkId(data, remaining)); |
| if (*remaining <= 0) |
| return false; |
| } |
| |
| if (entry->IsPrefix()) { |
| entry->SetPrefixAt(i, *reinterpret_cast<const SBPrefix*>(*data)); |
| } else { |
| entry->SetFullHashAt(i, *reinterpret_cast<const SBFullHash*>(*data)); |
| } |
| *data += hash_len; |
| *remaining -= hash_len; |
| if (*remaining < 0) |
| return false; |
| } |
| |
| return true; |
| } |
| |
| bool SafeBrowsingProtocolParser::ParseNewKey(const char* chunk_data, |
| int chunk_length, |
| std::string* client_key, |
| std::string* wrapped_key) { |
| DCHECK(client_key && wrapped_key); |
| client_key->clear(); |
| wrapped_key->clear(); |
| |
| const char* data = chunk_data; |
| int remaining = chunk_length; |
| |
| while (remaining > 0) { |
| std::string line; |
| if (!GetLine(data, remaining, &line)) |
| return false; |
| |
| std::vector<std::string> cmd_parts; |
| base::SplitString(line, ':', &cmd_parts); |
| if (cmd_parts.size() != 3) |
| return false; |
| |
| if (static_cast<int>(cmd_parts[2].size()) != atoi(cmd_parts[1].c_str())) |
| return false; |
| |
| if (cmd_parts[0] == "clientkey") { |
| client_key->assign(cmd_parts[2]); |
| } else if (cmd_parts[0] == "wrappedkey") { |
| wrapped_key->assign(cmd_parts[2]); |
| } else { |
| return false; |
| } |
| |
| data += line.size() + 1; |
| remaining -= static_cast<int>(line.size()) + 1; |
| } |
| |
| if (client_key->empty() || wrapped_key->empty()) |
| return false; |
| |
| return true; |
| } |