| // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "webkit/glue/multipart_response_delegate.h" |
| |
| #include "base/logging.h" |
| #include "base/string_number_conversions.h" |
| #include "base/string_util.h" |
| #include "net/base/net_util.h" |
| #include "net/http/http_util.h" |
| #include "third_party/WebKit/Source/WebKit/chromium/public/WebHTTPHeaderVisitor.h" |
| #include "third_party/WebKit/Source/WebKit/chromium/public/WebString.h" |
| #include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h" |
| #include "third_party/WebKit/Source/WebKit/chromium/public/WebURLLoaderClient.h" |
| |
| using WebKit::WebHTTPHeaderVisitor; |
| using WebKit::WebString; |
| using WebKit::WebURLLoader; |
| using WebKit::WebURLLoaderClient; |
| using WebKit::WebURLResponse; |
| |
| namespace webkit_glue { |
| |
| namespace { |
| |
| // The list of response headers that we do not copy from the original |
| // response when generating a WebURLResponse for a MIME payload. |
| const char* kReplaceHeaders[] = { |
| "content-type", |
| "content-length", |
| "content-disposition", |
| "content-range", |
| "range", |
| "set-cookie" |
| }; |
| |
| class HeaderCopier : public WebHTTPHeaderVisitor { |
| public: |
| HeaderCopier(WebURLResponse* response) |
| : response_(response) { |
| } |
| virtual void visitHeader(const WebString& name, const WebString& value) { |
| const std::string& name_utf8 = name.utf8(); |
| for (size_t i = 0; i < arraysize(kReplaceHeaders); ++i) { |
| if (LowerCaseEqualsASCII(name_utf8, kReplaceHeaders[i])) |
| return; |
| } |
| response_->setHTTPHeaderField(name, value); |
| } |
| private: |
| WebURLResponse* response_; |
| }; |
| |
| } // namespace |
| |
| MultipartResponseDelegate::MultipartResponseDelegate( |
| WebURLLoaderClient* client, |
| WebURLLoader* loader, |
| const WebURLResponse& response, |
| const std::string& boundary) |
| : client_(client), |
| loader_(loader), |
| original_response_(response), |
| encoded_data_length_(0), |
| boundary_("--"), |
| first_received_data_(true), |
| processing_headers_(false), |
| stop_sending_(false), |
| has_sent_first_response_(false) { |
| // Some servers report a boundary prefixed with "--". See bug 5786. |
| if (StartsWithASCII(boundary, "--", true)) { |
| boundary_.assign(boundary); |
| } else { |
| boundary_.append(boundary); |
| } |
| } |
| |
| void MultipartResponseDelegate::OnReceivedData(const char* data, |
| int data_len, |
| int encoded_data_length) { |
| // stop_sending_ means that we've already received the final boundary token. |
| // The server should stop sending us data at this point, but if it does, we |
| // just throw it away. |
| if (stop_sending_) |
| return; |
| |
| data_.append(data, data_len); |
| encoded_data_length_ += encoded_data_length; |
| if (first_received_data_) { |
| // Some servers don't send a boundary token before the first chunk of |
| // data. We handle this case anyway (Gecko does too). |
| first_received_data_ = false; |
| |
| // Eat leading \r\n |
| int pos = PushOverLine(data_, 0); |
| if (pos) |
| data_ = data_.substr(pos); |
| |
| if (data_.length() < boundary_.length() + 2) { |
| // We don't have enough data yet to make a boundary token. Just wait |
| // until the next chunk of data arrives. |
| first_received_data_ = true; |
| return; |
| } |
| |
| if (0 != data_.compare(0, boundary_.length(), boundary_)) { |
| data_ = boundary_ + "\n" + data_; |
| } |
| } |
| DCHECK(!first_received_data_); |
| |
| // Headers |
| if (processing_headers_) { |
| // Eat leading \r\n |
| int pos = PushOverLine(data_, 0); |
| if (pos) |
| data_ = data_.substr(pos); |
| |
| if (ParseHeaders()) { |
| // Successfully parsed headers. |
| processing_headers_ = false; |
| } else { |
| // Get more data before trying again. |
| return; |
| } |
| } |
| DCHECK(!processing_headers_); |
| |
| size_t boundary_pos; |
| while ((boundary_pos = FindBoundary()) != std::string::npos) { |
| if (client_) { |
| // Strip out trailing \n\r characters in the buffer preceding the |
| // boundary on the same lines as Firefox. |
| size_t data_length = boundary_pos; |
| if (boundary_pos > 0 && data_[boundary_pos - 1] == '\n') { |
| data_length--; |
| if (boundary_pos > 1 && data_[boundary_pos - 2] == '\r') { |
| data_length--; |
| } |
| } |
| if (data_length > 0) { |
| // Send the last data chunk. |
| client_->didReceiveData(loader_, |
| data_.data(), |
| static_cast<int>(data_length), |
| encoded_data_length_); |
| encoded_data_length_ = 0; |
| } |
| } |
| size_t boundary_end_pos = boundary_pos + boundary_.length(); |
| if (boundary_end_pos < data_.length() && '-' == data_[boundary_end_pos]) { |
| // This was the last boundary so we can stop processing. |
| stop_sending_ = true; |
| data_.clear(); |
| return; |
| } |
| |
| // We can now throw out data up through the boundary |
| int offset = PushOverLine(data_, boundary_end_pos); |
| data_ = data_.substr(boundary_end_pos + offset); |
| |
| // Ok, back to parsing headers |
| if (!ParseHeaders()) { |
| processing_headers_ = true; |
| break; |
| } |
| } |
| |
| // At this point, we should send over any data we have, but keep enough data |
| // buffered to handle a boundary that may have been truncated. |
| if (!processing_headers_ && data_.length() > boundary_.length()) { |
| // If the last character is a new line character, go ahead and just send |
| // everything we have buffered. This matches an optimization in Gecko. |
| int send_length = data_.length() - boundary_.length(); |
| if (data_[data_.length() - 1] == '\n') |
| send_length = data_.length(); |
| if (client_) |
| client_->didReceiveData(loader_, |
| data_.data(), |
| send_length, |
| encoded_data_length_); |
| data_ = data_.substr(send_length); |
| encoded_data_length_ = 0; |
| } |
| } |
| |
| void MultipartResponseDelegate::OnCompletedRequest() { |
| // If we have any pending data and we're not in a header, go ahead and send |
| // it to WebCore. |
| if (!processing_headers_ && !data_.empty() && !stop_sending_ && client_) { |
| client_->didReceiveData(loader_, |
| data_.data(), |
| static_cast<int>(data_.length()), |
| encoded_data_length_); |
| encoded_data_length_ = 0; |
| } |
| } |
| |
| int MultipartResponseDelegate::PushOverLine(const std::string& data, |
| size_t pos) { |
| int offset = 0; |
| if (pos < data.length() && (data[pos] == '\r' || data[pos] == '\n')) { |
| ++offset; |
| if (pos + 1 < data.length() && data[pos + 1] == '\n') |
| ++offset; |
| } |
| return offset; |
| } |
| |
| bool MultipartResponseDelegate::ParseHeaders() { |
| int line_feed_increment = 1; |
| |
| // Grab the headers being liberal about line endings. |
| size_t line_start_pos = 0; |
| size_t line_end_pos = data_.find('\n'); |
| while (line_end_pos != std::string::npos) { |
| // Handle CRLF |
| if (line_end_pos > line_start_pos && data_[line_end_pos - 1] == '\r') { |
| line_feed_increment = 2; |
| --line_end_pos; |
| } else { |
| line_feed_increment = 1; |
| } |
| if (line_start_pos == line_end_pos) { |
| // A blank line, end of headers |
| line_end_pos += line_feed_increment; |
| break; |
| } |
| // Find the next header line. |
| line_start_pos = line_end_pos + line_feed_increment; |
| line_end_pos = data_.find('\n', line_start_pos); |
| } |
| // Truncated in the middle of a header, stop parsing. |
| if (line_end_pos == std::string::npos) |
| return false; |
| |
| // Eat headers |
| std::string headers("\n"); |
| headers.append(data_, 0, line_end_pos); |
| data_ = data_.substr(line_end_pos); |
| |
| // Create a WebURLResponse based on the original set of headers + the |
| // replacement headers. We only replace the same few headers that gecko |
| // does. See netwerk/streamconv/converters/nsMultiMixedConv.cpp. |
| std::string content_type = net::GetSpecificHeader(headers, "content-type"); |
| std::string mime_type; |
| std::string charset; |
| bool has_charset = false; |
| net::HttpUtil::ParseContentType(content_type, &mime_type, &charset, |
| &has_charset); |
| WebURLResponse response(original_response_.url()); |
| response.setMIMEType(WebString::fromUTF8(mime_type)); |
| response.setTextEncodingName(WebString::fromUTF8(charset)); |
| |
| HeaderCopier copier(&response); |
| original_response_.visitHTTPHeaderFields(&copier); |
| |
| for (size_t i = 0; i < arraysize(kReplaceHeaders); ++i) { |
| std::string name(kReplaceHeaders[i]); |
| std::string value = net::GetSpecificHeader(headers, name); |
| if (!value.empty()) { |
| response.setHTTPHeaderField(WebString::fromUTF8(name), |
| WebString::fromUTF8(value)); |
| } |
| } |
| // To avoid recording every multipart load as a separate visit in |
| // the history database, we want to keep track of whether the response |
| // is part of a multipart payload. We do want to record the first visit, |
| // so we only set isMultipartPayload to true after the first visit. |
| response.setIsMultipartPayload(has_sent_first_response_); |
| has_sent_first_response_ = true; |
| // Send the response! |
| if (client_) |
| client_->didReceiveResponse(loader_, response); |
| |
| return true; |
| } |
| |
| // Boundaries are supposed to be preceeded with --, but it looks like gecko |
| // doesn't require the dashes to exist. See nsMultiMixedConv::FindToken. |
| size_t MultipartResponseDelegate::FindBoundary() { |
| size_t boundary_pos = data_.find(boundary_); |
| if (boundary_pos != std::string::npos) { |
| // Back up over -- for backwards compat |
| // TODO(tc): Don't we only want to do this once? Gecko code doesn't seem |
| // to care. |
| if (boundary_pos >= 2) { |
| if ('-' == data_[boundary_pos - 1] && '-' == data_[boundary_pos - 2]) { |
| boundary_pos -= 2; |
| boundary_ = "--" + boundary_; |
| } |
| } |
| } |
| return boundary_pos; |
| } |
| |
| bool MultipartResponseDelegate::ReadMultipartBoundary( |
| const WebURLResponse& response, |
| std::string* multipart_boundary) { |
| std::string content_type = |
| response.httpHeaderField(WebString::fromUTF8("Content-Type")).utf8(); |
| |
| size_t boundary_start_offset = content_type.find("boundary="); |
| if (boundary_start_offset == std::string::npos) |
| return false; |
| |
| boundary_start_offset += strlen("boundary="); |
| |
| size_t boundary_end_offset = content_type.find(';', boundary_start_offset); |
| |
| if (boundary_end_offset == std::string::npos) |
| boundary_end_offset = content_type.length(); |
| |
| size_t boundary_length = boundary_end_offset - boundary_start_offset; |
| |
| *multipart_boundary = |
| content_type.substr(boundary_start_offset, boundary_length); |
| // The byte range response can have quoted boundary strings. This is legal |
| // as per MIME specifications. Individual data fragements however don't |
| // contain quoted boundary strings. |
| TrimString(*multipart_boundary, "\"", multipart_boundary); |
| return true; |
| } |
| |
| bool MultipartResponseDelegate::ReadContentRanges( |
| const WebURLResponse& response, |
| int* content_range_lower_bound, |
| int* content_range_upper_bound, |
| int* content_range_instance_size) { |
| |
| std::string content_range = response.httpHeaderField("Content-Range").utf8(); |
| if (content_range.empty()) { |
| content_range = response.httpHeaderField("Range").utf8(); |
| } |
| |
| if (content_range.empty()) { |
| DLOG(WARNING) << "Failed to read content range from response."; |
| return false; |
| } |
| |
| size_t byte_range_lower_bound_start_offset = content_range.find(" "); |
| if (byte_range_lower_bound_start_offset == std::string::npos) { |
| return false; |
| } |
| |
| // Skip over the initial space. |
| byte_range_lower_bound_start_offset++; |
| |
| // Find the lower bound. |
| size_t byte_range_lower_bound_end_offset = |
| content_range.find("-", byte_range_lower_bound_start_offset); |
| if (byte_range_lower_bound_end_offset == std::string::npos) { |
| return false; |
| } |
| |
| size_t byte_range_lower_bound_characters = |
| byte_range_lower_bound_end_offset - byte_range_lower_bound_start_offset; |
| std::string byte_range_lower_bound = |
| content_range.substr(byte_range_lower_bound_start_offset, |
| byte_range_lower_bound_characters); |
| |
| // Find the upper bound. |
| size_t byte_range_upper_bound_start_offset = |
| byte_range_lower_bound_end_offset + 1; |
| |
| size_t byte_range_upper_bound_end_offset = |
| content_range.find("/", byte_range_upper_bound_start_offset); |
| if (byte_range_upper_bound_end_offset == std::string::npos) { |
| return false; |
| } |
| |
| size_t byte_range_upper_bound_characters = |
| byte_range_upper_bound_end_offset - byte_range_upper_bound_start_offset; |
| std::string byte_range_upper_bound = |
| content_range.substr(byte_range_upper_bound_start_offset, |
| byte_range_upper_bound_characters); |
| |
| // Find the instance size. |
| size_t byte_range_instance_size_start_offset = |
| byte_range_upper_bound_end_offset + 1; |
| |
| size_t byte_range_instance_size_end_offset = |
| content_range.length(); |
| |
| size_t byte_range_instance_size_characters = |
| byte_range_instance_size_end_offset - |
| byte_range_instance_size_start_offset; |
| std::string byte_range_instance_size = |
| content_range.substr(byte_range_instance_size_start_offset, |
| byte_range_instance_size_characters); |
| |
| if (!base::StringToInt(byte_range_lower_bound, content_range_lower_bound)) |
| return false; |
| if (!base::StringToInt(byte_range_upper_bound, content_range_upper_bound)) |
| return false; |
| if (!base::StringToInt(byte_range_instance_size, content_range_instance_size)) |
| return false; |
| return true; |
| } |
| |
| } // namespace webkit_glue |