net/socket/tcp_client_socket_win.cc - platform/external/chromium - Git at Google

 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "net/socket/tcp_client_socket_win.h"

 #include <mstcpip.h>

 #include "base/basictypes.h"
 #include "base/compiler_specific.h"
 #include "base/memory/memory_debug.h"
 #include "base/metrics/stats_counters.h"
 #include "base/string_util.h"
 #include "base/sys_info.h"
 #include "base/win/object_watcher.h"
 #include "net/base/address_list_net_log_param.h"
 #include "net/base/connection_type_histograms.h"
 #include "net/base/io_buffer.h"
 #include "net/base/ip_endpoint.h"
 #include "net/base/net_errors.h"
 #include "net/base/net_log.h"
 #include "net/base/net_util.h"
 #include "net/base/network_change_notifier.h"
 #include "net/base/sys_addrinfo.h"
 #include "net/base/winsock_init.h"
 #include "net/base/winsock_util.h"

 namespace net {

 namespace {

 int MapConnectError(int os_error) {
   switch (os_error) {
     // connect fails with WSAEACCES when Windows Firewall blocks the
     // connection.
     case WSAEACCES:
       return ERR_NETWORK_ACCESS_DENIED;
     case WSAETIMEDOUT:
       return ERR_CONNECTION_TIMED_OUT;
     default: {
       int net_error = MapSystemError(os_error);
       if (net_error == ERR_FAILED)
         return ERR_CONNECTION_FAILED;  // More specific than ERR_FAILED.

       // Give a more specific error when the user is offline.
       if (net_error == ERR_ADDRESS_UNREACHABLE &&
           NetworkChangeNotifier::IsOffline()) {
         return ERR_INTERNET_DISCONNECTED;
       }

       return net_error;
     }
   }
 }

 }  // namespace

 //-----------------------------------------------------------------------------

 // This class encapsulates all the state that has to be preserved as long as
 // there is a network IO operation in progress. If the owner TCPClientSocketWin
 // is destroyed while an operation is in progress, the Core is detached and it
 // lives until the operation completes and the OS doesn't reference any resource
 // declared on this class anymore.
 class TCPClientSocketWin::Core : public base::RefCounted<Core> {
  public:
   explicit Core(TCPClientSocketWin* socket);

   // Start watching for the end of a read or write operation.
   void WatchForRead();
   void WatchForWrite();

   // The TCPClientSocketWin is going away.
   void Detach() { socket_ = NULL; }

   // The separate OVERLAPPED variables for asynchronous operation.
   // |read_overlapped_| is used for both Connect() and Read().
   // |write_overlapped_| is only used for Write();
   OVERLAPPED read_overlapped_;
   OVERLAPPED write_overlapped_;

   // The buffers used in Read() and Write().
   WSABUF read_buffer_;
   WSABUF write_buffer_;
   scoped_refptr<IOBuffer> read_iobuffer_;
   scoped_refptr<IOBuffer> write_iobuffer_;
   int write_buffer_length_;

   // Throttle the read size based on our current slow start state.
   // Returns the throttled read size.
   int ThrottleReadSize(int size) {
     if (slow_start_throttle_ < kMaxSlowStartThrottle) {
       size = std::min(size, slow_start_throttle_);
       slow_start_throttle_ *= 2;
     }
     return size;
   }

  private:
   friend class base::RefCounted<Core>;

   class ReadDelegate : public base::win::ObjectWatcher::Delegate {
    public:
     explicit ReadDelegate(Core* core) : core_(core) {}
     virtual ~ReadDelegate() {}

     // base::ObjectWatcher::Delegate methods:
     virtual void OnObjectSignaled(HANDLE object);

    private:
     Core* const core_;
   };

   class WriteDelegate : public base::win::ObjectWatcher::Delegate {
    public:
     explicit WriteDelegate(Core* core) : core_(core) {}
     virtual ~WriteDelegate() {}

     // base::ObjectWatcher::Delegate methods:
     virtual void OnObjectSignaled(HANDLE object);

    private:
     Core* const core_;
   };

   ~Core();

   // The socket that created this object.
   TCPClientSocketWin* socket_;

   // |reader_| handles the signals from |read_watcher_|.
   ReadDelegate reader_;
   // |writer_| handles the signals from |write_watcher_|.
   WriteDelegate writer_;

   // |read_watcher_| watches for events from Connect() and Read().
   base::win::ObjectWatcher read_watcher_;
   // |write_watcher_| watches for events from Write();
   base::win::ObjectWatcher write_watcher_;

   // When doing reads from the socket, we try to mirror TCP's slow start.
   // We do this because otherwise the async IO subsystem artifically delays
   // returning data to the application.
   static const int kInitialSlowStartThrottle = 1 * 1024;
   static const int kMaxSlowStartThrottle = 32 * kInitialSlowStartThrottle;
   int slow_start_throttle_;

   DISALLOW_COPY_AND_ASSIGN(Core);
 };

 TCPClientSocketWin::Core::Core(
     TCPClientSocketWin* socket)
     : write_buffer_length_(0),
       socket_(socket),
       ALLOW_THIS_IN_INITIALIZER_LIST(reader_(this)),
       ALLOW_THIS_IN_INITIALIZER_LIST(writer_(this)),
       slow_start_throttle_(kInitialSlowStartThrottle) {
   memset(&read_overlapped_, 0, sizeof(read_overlapped_));
   memset(&write_overlapped_, 0, sizeof(write_overlapped_));
 }

 TCPClientSocketWin::Core::~Core() {
   // Make sure the message loop is not watching this object anymore.
   read_watcher_.StopWatching();
   write_watcher_.StopWatching();

   WSACloseEvent(read_overlapped_.hEvent);
   memset(&read_overlapped_, 0xaf, sizeof(read_overlapped_));
   WSACloseEvent(write_overlapped_.hEvent);
   memset(&write_overlapped_, 0xaf, sizeof(write_overlapped_));
 }

 void TCPClientSocketWin::Core::WatchForRead() {
   // We grab an extra reference because there is an IO operation in progress.
   // Balanced in ReadDelegate::OnObjectSignaled().
   AddRef();
   read_watcher_.StartWatching(read_overlapped_.hEvent, &reader_);
 }

 void TCPClientSocketWin::Core::WatchForWrite() {
   // We grab an extra reference because there is an IO operation in progress.
   // Balanced in WriteDelegate::OnObjectSignaled().
   AddRef();
   write_watcher_.StartWatching(write_overlapped_.hEvent, &writer_);
 }

 void TCPClientSocketWin::Core::ReadDelegate::OnObjectSignaled(
     HANDLE object) {
   DCHECK_EQ(object, core_->read_overlapped_.hEvent);
   if (core_->socket_) {
     if (core_->socket_->waiting_connect()) {
       core_->socket_->DidCompleteConnect();
     } else {
       core_->socket_->DidCompleteRead();
     }
   }

   core_->Release();
 }

 void TCPClientSocketWin::Core::WriteDelegate::OnObjectSignaled(
     HANDLE object) {
   DCHECK_EQ(object, core_->write_overlapped_.hEvent);
   if (core_->socket_)
     core_->socket_->DidCompleteWrite();

   core_->Release();
 }

 //-----------------------------------------------------------------------------

 TCPClientSocketWin::TCPClientSocketWin(const AddressList& addresses,
                                        net::NetLog* net_log,
                                        const net::NetLog::Source& source)
     : socket_(INVALID_SOCKET),
       addresses_(addresses),
       current_ai_(NULL),
       waiting_read_(false),
       waiting_write_(false),
       read_callback_(NULL),
       write_callback_(NULL),
       next_connect_state_(CONNECT_STATE_NONE),
       connect_os_error_(0),
       net_log_(BoundNetLog::Make(net_log, NetLog::SOURCE_SOCKET)),
       previously_disconnected_(false) {
   scoped_refptr<NetLog::EventParameters> params;
   if (source.is_valid())
     params = new NetLogSourceParameter("source_dependency", source);
   net_log_.BeginEvent(NetLog::TYPE_SOCKET_ALIVE, params);
   EnsureWinsockInit();
 }

 TCPClientSocketWin::~TCPClientSocketWin() {
   Disconnect();
   net_log_.EndEvent(NetLog::TYPE_SOCKET_ALIVE, NULL);
 }

 void TCPClientSocketWin::AdoptSocket(SOCKET socket) {
   DCHECK_EQ(socket_, INVALID_SOCKET);
   socket_ = socket;
   int error = SetupSocket();
   DCHECK_EQ(0, error);
   core_ = new Core(this);
   current_ai_ = addresses_.head();
   use_history_.set_was_ever_connected();
 }

 #ifdef ANDROID
 // TODO(kristianm): handle the case when wait_for_connect is true
 // (sync requests)
 #endif
 int TCPClientSocketWin::Connect(CompletionCallback* callback
 #ifdef ANDROID
                                 , bool wait_for_connect
 #endif
                                ) {
   DCHECK(CalledOnValidThread());

   // If already connected, then just return OK.
   if (socket_ != INVALID_SOCKET)
     return OK;

   base::StatsCounter connects("tcp.connect");
   connects.Increment();

   net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT,
                       new AddressListNetLogParam(addresses_));

   // We will try to connect to each address in addresses_. Start with the
   // first one in the list.
   next_connect_state_ = CONNECT_STATE_CONNECT;
   current_ai_ = addresses_.head();

   int rv = DoConnectLoop(OK);
   if (rv == ERR_IO_PENDING) {
     // Synchronous operation not supported.
     DCHECK(callback);
     read_callback_ = callback;
   } else {
     LogConnectCompletion(rv);
   }

   return rv;
 }

 int TCPClientSocketWin::DoConnectLoop(int result) {
   DCHECK_NE(next_connect_state_, CONNECT_STATE_NONE);

   int rv = result;
   do {
     ConnectState state = next_connect_state_;
     next_connect_state_ = CONNECT_STATE_NONE;
     switch (state) {
       case CONNECT_STATE_CONNECT:
         DCHECK_EQ(OK, rv);
         rv = DoConnect();
         break;
       case CONNECT_STATE_CONNECT_COMPLETE:
         rv = DoConnectComplete(rv);
         break;
       default:
         LOG(DFATAL) << "bad state " << state;
         rv = ERR_UNEXPECTED;
         break;
     }
   } while (rv != ERR_IO_PENDING && next_connect_state_ != CONNECT_STATE_NONE);

   return rv;
 }

 int TCPClientSocketWin::DoConnect() {
   const struct addrinfo* ai = current_ai_;
   DCHECK(ai);
   DCHECK_EQ(0, connect_os_error_);

   if (previously_disconnected_) {
     use_history_.Reset();
     previously_disconnected_ = false;
   }

   net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT,
                       new NetLogStringParameter(
                           "address", NetAddressToStringWithPort(current_ai_)));

   next_connect_state_ = CONNECT_STATE_CONNECT_COMPLETE;

   connect_os_error_ = CreateSocket(ai);
   if (connect_os_error_ != 0)
     return MapSystemError(connect_os_error_);

   DCHECK(!core_);
   core_ = new Core(this);

   // WSACreateEvent creates a manual-reset event object.
   core_->read_overlapped_.hEvent = WSACreateEvent();
   // WSAEventSelect sets the socket to non-blocking mode as a side effect.
   // Our connect() and recv() calls require that the socket be non-blocking.
   WSAEventSelect(socket_, core_->read_overlapped_.hEvent, FD_CONNECT);

   core_->write_overlapped_.hEvent = WSACreateEvent();

   if (!connect(socket_, ai->ai_addr, static_cast<int>(ai->ai_addrlen))) {
     // Connected without waiting!
     //
     // The MSDN page for connect says:
     //   With a nonblocking socket, the connection attempt cannot be completed
     //   immediately. In this case, connect will return SOCKET_ERROR, and
     //   WSAGetLastError will return WSAEWOULDBLOCK.
     // which implies that for a nonblocking socket, connect never returns 0.
     // It's not documented whether the event object will be signaled or not
     // if connect does return 0.  So the code below is essentially dead code
     // and we don't know if it's correct.
     NOTREACHED();

     if (ResetEventIfSignaled(core_->read_overlapped_.hEvent))
       return OK;
   } else {
     int os_error = WSAGetLastError();
     if (os_error != WSAEWOULDBLOCK) {
       LOG(ERROR) << "connect failed: " << os_error;
       connect_os_error_ = os_error;
       return MapConnectError(os_error);
     }
   }

   core_->WatchForRead();
   return ERR_IO_PENDING;
 }

 int TCPClientSocketWin::DoConnectComplete(int result) {
   // Log the end of this attempt (and any OS error it threw).
   int os_error = connect_os_error_;
   connect_os_error_ = 0;
   scoped_refptr<NetLog::EventParameters> params;
   if (result != OK)
     params = new NetLogIntegerParameter("os_error", os_error);
   net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT, params);

   if (result == OK) {
     use_history_.set_was_ever_connected();
     return OK;  // Done!
   }

   // Close whatever partially connected socket we currently have.
   DoDisconnect();

   // Try to fall back to the next address in the list.
   if (current_ai_->ai_next) {
     next_connect_state_ = CONNECT_STATE_CONNECT;
     current_ai_ = current_ai_->ai_next;
     return OK;
   }

   // Otherwise there is nothing to fall back to, so give up.
   return result;
 }

 void TCPClientSocketWin::Disconnect() {
   DoDisconnect();
   current_ai_ = NULL;
 }

 void TCPClientSocketWin::DoDisconnect() {
   DCHECK(CalledOnValidThread());

   if (socket_ == INVALID_SOCKET)
     return;

   // Note: don't use CancelIo to cancel pending IO because it doesn't work
   // when there is a Winsock layered service provider.

   // In most socket implementations, closing a socket results in a graceful
   // connection shutdown, but in Winsock we have to call shutdown explicitly.
   // See the MSDN page "Graceful Shutdown, Linger Options, and Socket Closure"
   // at http://msdn.microsoft.com/en-us/library/ms738547.aspx
   shutdown(socket_, SD_SEND);

   // This cancels any pending IO.
   closesocket(socket_);
   socket_ = INVALID_SOCKET;

   if (waiting_connect()) {
     // We closed the socket, so this notification will never come.
     // From MSDN' WSAEventSelect documentation:
     // "Closing a socket with closesocket also cancels the association and
     // selection of network events specified in WSAEventSelect for the socket".
     core_->Release();
   }

   waiting_read_ = false;
   waiting_write_ = false;

   core_->Detach();
   core_ = NULL;

   previously_disconnected_ = true;
 }

 bool TCPClientSocketWin::IsConnected() const {
   DCHECK(CalledOnValidThread());

   if (socket_ == INVALID_SOCKET || waiting_connect())
     return false;

   // Check if connection is alive.
   char c;
   int rv = recv(socket_, &c, 1, MSG_PEEK);
   if (rv == 0)
     return false;
   if (rv == SOCKET_ERROR && WSAGetLastError() != WSAEWOULDBLOCK)
     return false;

   return true;
 }

 bool TCPClientSocketWin::IsConnectedAndIdle() const {
   DCHECK(CalledOnValidThread());

   if (socket_ == INVALID_SOCKET || waiting_connect())
     return false;

   // Check if connection is alive and we haven't received any data
   // unexpectedly.
   char c;
   int rv = recv(socket_, &c, 1, MSG_PEEK);
   if (rv >= 0)
     return false;
   if (WSAGetLastError() != WSAEWOULDBLOCK)
     return false;

   return true;
 }

 int TCPClientSocketWin::GetPeerAddress(AddressList* address) const {
   DCHECK(CalledOnValidThread());
   DCHECK(address);
   if (!IsConnected())
     return ERR_SOCKET_NOT_CONNECTED;
   address->Copy(current_ai_, false);
   return OK;
 }

 int TCPClientSocketWin::GetLocalAddress(IPEndPoint* address) const {
   DCHECK(CalledOnValidThread());
   DCHECK(address);
   if (!IsConnected())
     return ERR_SOCKET_NOT_CONNECTED;

   struct sockaddr_storage addr_storage;
   socklen_t addr_len = sizeof(addr_storage);
   struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
   if (getsockname(socket_, addr, &addr_len))
     return MapSystemError(WSAGetLastError());
   if (!address->FromSockAddr(addr, addr_len))
     return ERR_FAILED;
   return OK;
 }

 void TCPClientSocketWin::SetSubresourceSpeculation() {
   use_history_.set_subresource_speculation();
 }

 void TCPClientSocketWin::SetOmniboxSpeculation() {
   use_history_.set_omnibox_speculation();
 }

 bool TCPClientSocketWin::WasEverUsed() const {
   return use_history_.was_used_to_convey_data();
 }

 bool TCPClientSocketWin::UsingTCPFastOpen() const {
   // Not supported on windows.
   return false;
 }

 int TCPClientSocketWin::Read(IOBuffer* buf,
                              int buf_len,
                              CompletionCallback* callback) {
   DCHECK(CalledOnValidThread());
   DCHECK_NE(socket_, INVALID_SOCKET);
   DCHECK(!waiting_read_);
   DCHECK(!read_callback_);
   DCHECK(!core_->read_iobuffer_);

   buf_len = core_->ThrottleReadSize(buf_len);

   core_->read_buffer_.len = buf_len;
   core_->read_buffer_.buf = buf->data();

   // TODO(wtc): Remove the assertion after enough testing.
   AssertEventNotSignaled(core_->read_overlapped_.hEvent);
   DWORD num, flags = 0;
   int rv = WSARecv(socket_, &core_->read_buffer_, 1, &num, &flags,
                    &core_->read_overlapped_, NULL);
   if (rv == 0) {
     if (ResetEventIfSignaled(core_->read_overlapped_.hEvent)) {
       // Because of how WSARecv fills memory when used asynchronously, Purify
       // isn't able to detect that it's been initialized, so it scans for 0xcd
       // in the buffer and reports UMRs (uninitialized memory reads) for those
       // individual bytes. We override that in PURIFY builds to avoid the
       // false error reports.
       // See bug 5297.
       base::MemoryDebug::MarkAsInitialized(core_->read_buffer_.buf, num);
       base::StatsCounter read_bytes("tcp.read_bytes");
       read_bytes.Add(num);
       if (num > 0)
         use_history_.set_was_used_to_convey_data();
       LogByteTransfer(net_log_, NetLog::TYPE_SOCKET_BYTES_RECEIVED, num,
                       core_->read_buffer_.buf);
       return static_cast<int>(num);
     }
   } else {
     int os_error = WSAGetLastError();
     if (os_error != WSA_IO_PENDING)
       return MapSystemError(os_error);
   }
   core_->WatchForRead();
   waiting_read_ = true;
   read_callback_ = callback;
   core_->read_iobuffer_ = buf;
   return ERR_IO_PENDING;
 }

 int TCPClientSocketWin::Write(IOBuffer* buf,
                               int buf_len,
                               CompletionCallback* callback) {
   DCHECK(CalledOnValidThread());
   DCHECK_NE(socket_, INVALID_SOCKET);
   DCHECK(!waiting_write_);
   DCHECK(!write_callback_);
   DCHECK_GT(buf_len, 0);
   DCHECK(!core_->write_iobuffer_);

   base::StatsCounter writes("tcp.writes");
   writes.Increment();

   core_->write_buffer_.len = buf_len;
   core_->write_buffer_.buf = buf->data();
   core_->write_buffer_length_ = buf_len;

   // TODO(wtc): Remove the assertion after enough testing.
   AssertEventNotSignaled(core_->write_overlapped_.hEvent);
   DWORD num;
   int rv = WSASend(socket_, &core_->write_buffer_, 1, &num, 0,
                    &core_->write_overlapped_, NULL);
   if (rv == 0) {
     if (ResetEventIfSignaled(core_->write_overlapped_.hEvent)) {
       rv = static_cast<int>(num);
       if (rv > buf_len || rv < 0) {
         // It seems that some winsock interceptors report that more was written
         // than was available. Treat this as an error.  http://crbug.com/27870
         LOG(ERROR) << "Detected broken LSP: Asked to write " << buf_len
                    << " bytes, but " << rv << " bytes reported.";
         return ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES;
       }
       base::StatsCounter write_bytes("tcp.write_bytes");
       write_bytes.Add(rv);
       if (rv > 0)
         use_history_.set_was_used_to_convey_data();
       LogByteTransfer(net_log_, NetLog::TYPE_SOCKET_BYTES_SENT, rv,
                       core_->write_buffer_.buf);
       return rv;
     }
   } else {
     int os_error = WSAGetLastError();
     if (os_error != WSA_IO_PENDING)
       return MapSystemError(os_error);
   }
   core_->WatchForWrite();
   waiting_write_ = true;
   write_callback_ = callback;
   core_->write_iobuffer_ = buf;
   return ERR_IO_PENDING;
 }

 bool TCPClientSocketWin::SetReceiveBufferSize(int32 size) {
   DCHECK(CalledOnValidThread());
   int rv = setsockopt(socket_, SOL_SOCKET, SO_RCVBUF,
                       reinterpret_cast<const char*>(&size), sizeof(size));
   DCHECK(!rv) << "Could not set socket receive buffer size: " << GetLastError();
   return rv == 0;
 }

 bool TCPClientSocketWin::SetSendBufferSize(int32 size) {
   DCHECK(CalledOnValidThread());
   int rv = setsockopt(socket_, SOL_SOCKET, SO_SNDBUF,
                       reinterpret_cast<const char*>(&size), sizeof(size));
   DCHECK(!rv) << "Could not set socket send buffer size: " << GetLastError();
   return rv == 0;
 }

 int TCPClientSocketWin::CreateSocket(const struct addrinfo* ai) {
   socket_ = WSASocket(ai->ai_family, ai->ai_socktype, ai->ai_protocol, NULL, 0,
                       WSA_FLAG_OVERLAPPED);
   if (socket_ == INVALID_SOCKET) {
     int os_error = WSAGetLastError();
     LOG(ERROR) << "WSASocket failed: " << os_error;
     return os_error;
   }
   return SetupSocket();
 }

 int TCPClientSocketWin::SetupSocket() {
   // Increase the socket buffer sizes from the default sizes for WinXP.  In
   // performance testing, there is substantial benefit by increasing from 8KB
   // to 64KB.
   // See also:
   //    http://support.microsoft.com/kb/823764/EN-US
   // On Vista, if we manually set these sizes, Vista turns off its receive
   // window auto-tuning feature.
   //    http://blogs.msdn.com/wndp/archive/2006/05/05/Winhec-blog-tcpip-2.aspx
   // Since Vista's auto-tune is better than any static value we can could set,
   // only change these on pre-vista machines.
   int32 major_version, minor_version, fix_version;
   base::SysInfo::OperatingSystemVersionNumbers(&major_version, &minor_version,
     &fix_version);
   if (major_version < 6) {
     const int32 kSocketBufferSize = 64 * 1024;
     SetReceiveBufferSize(kSocketBufferSize);
     SetSendBufferSize(kSocketBufferSize);
   }

   // Disable Nagle.
   // The Nagle implementation on windows is governed by RFC 896.  The idea
   // behind Nagle is to reduce small packets on the network.  When Nagle is
   // enabled, if a partial packet has been sent, the TCP stack will disallow
   // further *partial* packets until an ACK has been received from the other
   // side.  Good applications should always strive to send as much data as
   // possible and avoid partial-packet sends.  However, in most real world
   // applications, there are edge cases where this does not happen, and two
   // partil packets may be sent back to back.  For a browser, it is NEVER
   // a benefit to delay for an RTT before the second packet is sent.
   //
   // As a practical example in Chromium today, consider the case of a small
   // POST.  I have verified this:
   //     Client writes 649 bytes of header  (partial packet #1)
   //     Client writes 50 bytes of POST data (partial packet #2)
   // In the above example, with Nagle, a RTT delay is inserted between these
   // two sends due to nagle.  RTTs can easily be 100ms or more.  The best
   // fix is to make sure that for POSTing data, we write as much data as
   // possible and minimize partial packets.  We will fix that.  But disabling
   // Nagle also ensure we don't run into this delay in other edge cases.
   // See also:
   //    http://technet.microsoft.com/en-us/library/bb726981.aspx
   const BOOL kDisableNagle = TRUE;
   int rv = setsockopt(socket_, IPPROTO_TCP, TCP_NODELAY,
                       reinterpret_cast<const char*>(&kDisableNagle),
                       sizeof(kDisableNagle));
   DCHECK(!rv) << "Could not disable nagle";

   // Enable TCP Keep-Alive to prevent NAT routers from timing out TCP
   // connections. See http://crbug.com/27400 for details.

   struct tcp_keepalive keepalive_vals = {
     1, // TCP keep-alive on.
     45000,  // Wait 45s until sending first TCP keep-alive packet.
     45000,  // Wait 45s between sending TCP keep-alive packets.
   };
   DWORD bytes_returned = 0xABAB;
   rv = WSAIoctl(socket_, SIO_KEEPALIVE_VALS, &keepalive_vals,
                 sizeof(keepalive_vals), NULL, 0,
                 &bytes_returned, NULL, NULL);
   DCHECK(!rv) << "Could not enable TCP Keep-Alive for socket: " << socket_
               << " [error: " << WSAGetLastError() << "].";

   // Disregard any failure in disabling nagle or enabling TCP Keep-Alive.
   return 0;
 }

 void TCPClientSocketWin::LogConnectCompletion(int net_error) {
   if (net_error == OK)
     UpdateConnectionTypeHistograms(CONNECTION_ANY);

   if (net_error != OK) {
     net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, net_error);
     return;
   }

   struct sockaddr_storage source_address;
   socklen_t addrlen = sizeof(source_address);
   int rv = getsockname(
       socket_, reinterpret_cast<struct sockaddr*>(&source_address), &addrlen);
   if (rv != 0) {
     LOG(ERROR) << "getsockname() [rv: " << rv
                << "] error: " << WSAGetLastError();
     NOTREACHED();
     net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, rv);
     return;
   }

   const std::string source_address_str =
       NetAddressToStringWithPort(
           reinterpret_cast<const struct sockaddr*>(&source_address),
           sizeof(source_address));
   net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT,
                     make_scoped_refptr(new NetLogStringParameter(
                         "source address",
                         source_address_str)));
 }

 void TCPClientSocketWin::DoReadCallback(int rv) {
   DCHECK_NE(rv, ERR_IO_PENDING);
   DCHECK(read_callback_);

   // since Run may result in Read being called, clear read_callback_ up front.
   CompletionCallback* c = read_callback_;
   read_callback_ = NULL;
   c->Run(rv);
 }

 void TCPClientSocketWin::DoWriteCallback(int rv) {
   DCHECK_NE(rv, ERR_IO_PENDING);
   DCHECK(write_callback_);

   // since Run may result in Write being called, clear write_callback_ up front.
   CompletionCallback* c = write_callback_;
   write_callback_ = NULL;
   c->Run(rv);
 }

 void TCPClientSocketWin::DidCompleteConnect() {
   DCHECK_EQ(next_connect_state_, CONNECT_STATE_CONNECT_COMPLETE);
   int result;

   WSANETWORKEVENTS events;
   int rv = WSAEnumNetworkEvents(socket_, core_->read_overlapped_.hEvent,
                                 &events);
   int os_error = 0;
   if (rv == SOCKET_ERROR) {
     NOTREACHED();
     os_error = WSAGetLastError();
     result = MapSystemError(os_error);
   } else if (events.lNetworkEvents & FD_CONNECT) {
     os_error = events.iErrorCode[FD_CONNECT_BIT];
     result = MapConnectError(os_error);
   } else {
     NOTREACHED();
     result = ERR_UNEXPECTED;
   }

   connect_os_error_ = os_error;
   rv = DoConnectLoop(result);
   if (rv != ERR_IO_PENDING) {
     LogConnectCompletion(rv);
     DoReadCallback(rv);
   }
 }

 void TCPClientSocketWin::DidCompleteRead() {
   DCHECK(waiting_read_);
   DWORD num_bytes, flags;
   BOOL ok = WSAGetOverlappedResult(socket_, &core_->read_overlapped_,
                                    &num_bytes, FALSE, &flags);
   WSAResetEvent(core_->read_overlapped_.hEvent);
   waiting_read_ = false;
   core_->read_iobuffer_ = NULL;
   if (ok) {
     base::StatsCounter read_bytes("tcp.read_bytes");
     read_bytes.Add(num_bytes);
     if (num_bytes > 0)
       use_history_.set_was_used_to_convey_data();
     LogByteTransfer(net_log_, NetLog::TYPE_SOCKET_BYTES_RECEIVED, num_bytes,
                     core_->read_buffer_.buf);
   }
   DoReadCallback(ok ? num_bytes : MapSystemError(WSAGetLastError()));
 }

 void TCPClientSocketWin::DidCompleteWrite() {
   DCHECK(waiting_write_);

   DWORD num_bytes, flags;
   BOOL ok = WSAGetOverlappedResult(socket_, &core_->write_overlapped_,
                                    &num_bytes, FALSE, &flags);
   WSAResetEvent(core_->write_overlapped_.hEvent);
   waiting_write_ = false;
   int rv;
   if (!ok) {
     rv = MapSystemError(WSAGetLastError());
   } else {
     rv = static_cast<int>(num_bytes);
     if (rv > core_->write_buffer_length_ || rv < 0) {
       // It seems that some winsock interceptors report that more was written
       // than was available. Treat this as an error.  http://crbug.com/27870
       LOG(ERROR) << "Detected broken LSP: Asked to write "
                  << core_->write_buffer_length_ << " bytes, but " << rv
                  << " bytes reported.";
       rv = ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES;
     } else {
       base::StatsCounter write_bytes("tcp.write_bytes");
       write_bytes.Add(num_bytes);
       if (num_bytes > 0)
         use_history_.set_was_used_to_convey_data();
       LogByteTransfer(net_log_, NetLog::TYPE_SOCKET_BYTES_SENT, num_bytes,
                       core_->write_buffer_.buf);
     }
   }
   core_->write_iobuffer_ = NULL;
   DoWriteCallback(rv);
 }

 }  // namespace net