| // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "chrome/browser/autofill/address_field.h" |
| |
| #include <stddef.h> |
| |
| #include "base/logging.h" |
| #include "base/memory/scoped_ptr.h" |
| #include "base/string16.h" |
| #include "base/string_util.h" |
| #include "base/utf_string_conversions.h" |
| #include "chrome/browser/autofill/autofill_field.h" |
| #include "grit/autofill_resources.h" |
| #include "ui/base/l10n/l10n_util.h" |
| |
| bool AddressField::GetFieldInfo(FieldTypeMap* field_type_map) const { |
| AutofillFieldType address_company; |
| AutofillFieldType address_line1; |
| AutofillFieldType address_line2; |
| AutofillFieldType address_city; |
| AutofillFieldType address_state; |
| AutofillFieldType address_zip; |
| AutofillFieldType address_country; |
| |
| switch (type_) { |
| case kShippingAddress: |
| // Fall through. Autofill does not support shipping addresses. |
| case kGenericAddress: |
| address_company = COMPANY_NAME; |
| address_line1 = ADDRESS_HOME_LINE1; |
| address_line2 = ADDRESS_HOME_LINE2; |
| address_city = ADDRESS_HOME_CITY; |
| address_state = ADDRESS_HOME_STATE; |
| address_zip = ADDRESS_HOME_ZIP; |
| address_country = ADDRESS_HOME_COUNTRY; |
| break; |
| |
| case kBillingAddress: |
| address_company = COMPANY_NAME; |
| address_line1 = ADDRESS_BILLING_LINE1; |
| address_line2 = ADDRESS_BILLING_LINE2; |
| address_city = ADDRESS_BILLING_CITY; |
| address_state = ADDRESS_BILLING_STATE; |
| address_zip = ADDRESS_BILLING_ZIP; |
| address_country = ADDRESS_BILLING_COUNTRY; |
| break; |
| |
| default: |
| NOTREACHED(); |
| return false; |
| } |
| |
| bool ok; |
| ok = Add(field_type_map, company_, AutofillType(address_company)); |
| DCHECK(ok); |
| ok = ok && Add(field_type_map, address1_, AutofillType(address_line1)); |
| DCHECK(ok); |
| ok = ok && Add(field_type_map, address2_, AutofillType(address_line2)); |
| DCHECK(ok); |
| ok = ok && Add(field_type_map, city_, AutofillType(address_city)); |
| DCHECK(ok); |
| ok = ok && Add(field_type_map, state_, AutofillType(address_state)); |
| DCHECK(ok); |
| ok = ok && Add(field_type_map, zip_, AutofillType(address_zip)); |
| DCHECK(ok); |
| ok = ok && Add(field_type_map, country_, AutofillType(address_country)); |
| DCHECK(ok); |
| |
| return ok; |
| } |
| |
| FormFieldType AddressField::GetFormFieldType() const { |
| return kAddressType; |
| } |
| |
| AddressField* AddressField::Parse( |
| std::vector<AutofillField*>::const_iterator* iter, |
| bool is_ecml) { |
| DCHECK(iter); |
| if (!iter) |
| return NULL; |
| |
| scoped_ptr<AddressField> address_field(new AddressField); |
| std::vector<AutofillField*>::const_iterator q = *iter; |
| string16 pattern; |
| |
| // The ECML standard uses 2 letter country codes. So we will |
| // have to remember that this is an ECML form, for when we fill |
| // it out. |
| address_field->is_ecml_ = is_ecml; |
| |
| string16 attention_ignored = |
| l10n_util::GetStringUTF16(IDS_AUTOFILL_ATTENTION_IGNORED_RE); |
| string16 region_ignored = |
| l10n_util::GetStringUTF16(IDS_AUTOFILL_REGION_IGNORED_RE); |
| |
| // Allow address fields to appear in any order. |
| while (true) { |
| if (ParseCompany(&q, is_ecml, address_field.get()) || |
| ParseAddressLines(&q, is_ecml, address_field.get()) || |
| ParseCity(&q, is_ecml, address_field.get()) || |
| ParseState(&q, is_ecml, address_field.get()) || |
| ParseZipCode(&q, is_ecml, address_field.get()) || |
| ParseCountry(&q, is_ecml, address_field.get())) { |
| continue; |
| } else if (ParseText(&q, attention_ignored) || |
| ParseText(&q, region_ignored)) { |
| // We ignore the following: |
| // * Attention. |
| // * Province/Region/Other. |
| continue; |
| } else if (*q != **iter && ParseEmpty(&q)) { |
| // Ignore non-labeled fields within an address; the page |
| // MapQuest Driving Directions North America.html contains such a field. |
| // We only ignore such fields after we've parsed at least one other field; |
| // otherwise we'd effectively parse address fields before other field |
| // types after any non-labeled fields, and we want email address fields to |
| // have precedence since some pages contain fields labeled |
| // "Email address". |
| continue; |
| } else { |
| // No field found. |
| break; |
| } |
| } |
| |
| // If we have identified any address fields in this field then it should be |
| // added to the list of fields. |
| if (address_field->company_ != NULL || |
| address_field->address1_ != NULL || address_field->address2_ != NULL || |
| address_field->city_ != NULL || address_field->state_ != NULL || |
| address_field->zip_ != NULL || address_field->zip4_ || |
| address_field->country_ != NULL) { |
| *iter = q; |
| return address_field.release(); |
| } |
| |
| return NULL; |
| } |
| |
| AddressType AddressField::FindType() const { |
| // This is not a full address, so don't even bother trying to figure |
| // out its type. |
| if (address1_ == NULL) |
| return kGenericAddress; |
| |
| // First look at the field name, which itself will sometimes contain |
| // "bill" or "ship". We could check for the ECML type prefixes |
| // here, but there's no need to since ECML's prefixes Ecom_BillTo |
| // and Ecom_ShipTo contain "bill" and "ship" anyway. |
| string16 name = StringToLowerASCII(address1_->name); |
| return AddressTypeFromText(name); |
| } |
| |
| bool AddressField::IsFullAddress() { |
| return address1_ != NULL; |
| } |
| |
| AddressField::AddressField() |
| : company_(NULL), |
| address1_(NULL), |
| address2_(NULL), |
| city_(NULL), |
| state_(NULL), |
| zip_(NULL), |
| zip4_(NULL), |
| country_(NULL), |
| type_(kGenericAddress), |
| is_ecml_(false) { |
| } |
| |
| // static |
| bool AddressField::ParseCompany( |
| std::vector<AutofillField*>::const_iterator* iter, |
| bool is_ecml, AddressField* address_field) { |
| if (address_field->company_ && !address_field->company_->IsEmpty()) |
| return false; |
| |
| string16 pattern; |
| if (is_ecml) |
| pattern = GetEcmlPattern(kEcmlShipToCompanyName, |
| kEcmlBillToCompanyName, '|'); |
| else |
| pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COMPANY_RE); |
| |
| if (!ParseText(iter, pattern, &address_field->company_)) |
| return false; |
| |
| return true; |
| } |
| |
| // static |
| bool AddressField::ParseAddressLines( |
| std::vector<AutofillField*>::const_iterator* iter, |
| bool is_ecml, AddressField* address_field) { |
| // We only match the string "address" in page text, not in element names, |
| // because sometimes every element in a group of address fields will have |
| // a name containing the string "address"; for example, on the page |
| // Kohl's - Register Billing Address.html the text element labeled "city" |
| // has the name "BILL_TO_ADDRESS<>city". We do match address labels |
| // such as "address1", which appear as element names on various pages (eg |
| // AmericanGirl-Registration.html, BloomingdalesBilling.html, |
| // EBay Registration Enter Information.html). |
| if (address_field->address1_) |
| return false; |
| |
| string16 pattern; |
| if (is_ecml) { |
| pattern = GetEcmlPattern(kEcmlShipToAddress1, kEcmlBillToAddress1, '|'); |
| if (!ParseText(iter, pattern, &address_field->address1_)) |
| return false; |
| } else { |
| pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_RE); |
| string16 label_pattern = |
| l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE); |
| |
| if (!ParseText(iter, pattern, &address_field->address1_)) |
| if (!ParseLabelText(iter, label_pattern, &address_field->address1_)) |
| return false; |
| } |
| |
| // Optionally parse more address lines, which may have empty labels. |
| // Some pages have 3 address lines (eg SharperImageModifyAccount.html) |
| // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)! |
| if (is_ecml) { |
| pattern = GetEcmlPattern(kEcmlShipToAddress2, kEcmlBillToAddress2, '|'); |
| if (!ParseEmptyText(iter, &address_field->address2_)) |
| ParseText(iter, pattern, &address_field->address2_); |
| } else { |
| pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_2_RE); |
| string16 label_pattern = |
| l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE); |
| if (!ParseEmptyText(iter, &address_field->address2_)) |
| if (!ParseText(iter, pattern, &address_field->address2_)) |
| ParseLabelText(iter, label_pattern, &address_field->address2_); |
| } |
| |
| // Try for a third line, which we will promptly discard. |
| if (address_field->address2_ != NULL) { |
| if (is_ecml) { |
| pattern = GetEcmlPattern(kEcmlShipToAddress3, kEcmlBillToAddress3, '|'); |
| ParseText(iter, pattern); |
| } else { |
| pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_3_RE); |
| if (!ParseEmptyText(iter, NULL)) |
| ParseText(iter, pattern, NULL); |
| } |
| } |
| |
| return true; |
| } |
| |
| // static |
| bool AddressField::ParseCountry( |
| std::vector<AutofillField*>::const_iterator* iter, |
| bool is_ecml, AddressField* address_field) { |
| // Parse a country. The occasional page (e.g. |
| // Travelocity_New Member Information1.html) calls this a "location". |
| // Note: ECML standard uses 2 letter country code (ISO 3166) |
| if (address_field->country_ && !address_field->country_->IsEmpty()) |
| return false; |
| |
| string16 pattern; |
| if (is_ecml) |
| pattern = GetEcmlPattern(kEcmlShipToCountry, kEcmlBillToCountry, '|'); |
| else |
| pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COUNTRY_RE); |
| |
| if (!ParseText(iter, pattern, &address_field->country_)) |
| return false; |
| |
| return true; |
| } |
| |
| // static |
| bool AddressField::ParseZipCode( |
| std::vector<AutofillField*>::const_iterator* iter, |
| bool is_ecml, AddressField* address_field) { |
| // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this |
| // is called a "post code". |
| // |
| // HACK: Just for the MapQuest driving directions page we match the |
| // exact name "1z", which MapQuest uses to label its zip code field. |
| // Hopefully before long we'll be smart enough to find the zip code |
| // on that page automatically. |
| if (address_field->zip_) |
| return false; |
| |
| // We may be out of fields. |
| if (!**iter) |
| return false; |
| |
| string16 pattern; |
| if (is_ecml) { |
| pattern = GetEcmlPattern(kEcmlShipToPostalCode, kEcmlBillToPostalCode, '|'); |
| } else { |
| pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ZIP_CODE_RE); |
| } |
| |
| AddressType tempType; |
| string16 name = (**iter)->name; |
| |
| // Note: comparisons using the ecml compliant name as a prefix must be used in |
| // order to accommodate Google Checkout. See FormFieldSet::GetEcmlPattern for |
| // more detail. |
| string16 bill_to_postal_code_field(ASCIIToUTF16(kEcmlBillToPostalCode)); |
| if (StartsWith(name, bill_to_postal_code_field, false)) { |
| tempType = kBillingAddress; |
| } else if (StartsWith(name, bill_to_postal_code_field, false)) { |
| tempType = kShippingAddress; |
| } else { |
| tempType = kGenericAddress; |
| } |
| |
| if (!ParseText(iter, pattern, &address_field->zip_)) |
| return false; |
| |
| address_field->type_ = tempType; |
| if (!is_ecml) { |
| // Look for a zip+4, whose field name will also often contain |
| // the substring "zip". |
| ParseText(iter, |
| l10n_util::GetStringUTF16(IDS_AUTOFILL_ZIP_4_RE), |
| &address_field->zip4_); |
| } |
| |
| return true; |
| } |
| |
| // static |
| bool AddressField::ParseCity( |
| std::vector<AutofillField*>::const_iterator* iter, |
| bool is_ecml, AddressField* address_field) { |
| // Parse a city name. Some UK pages (e.g. The China Shop2.html) use |
| // the term "town". |
| if (address_field->city_) |
| return false; |
| |
| string16 pattern; |
| if (is_ecml) |
| pattern = GetEcmlPattern(kEcmlShipToCity, kEcmlBillToCity, '|'); |
| else |
| pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_CITY_RE); |
| |
| if (!ParseText(iter, pattern, &address_field->city_)) |
| return false; |
| |
| return true; |
| } |
| |
| // static |
| bool AddressField::ParseState( |
| std::vector<AutofillField*>::const_iterator* iter, |
| bool is_ecml, AddressField* address_field) { |
| if (address_field->state_) |
| return false; |
| |
| string16 pattern; |
| if (is_ecml) |
| pattern = GetEcmlPattern(kEcmlShipToStateProv, kEcmlBillToStateProv, '|'); |
| else |
| pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_STATE_RE); |
| |
| if (!ParseText(iter, pattern, &address_field->state_)) |
| return false; |
| |
| return true; |
| } |
| |
| AddressType AddressField::AddressTypeFromText(const string16 &text) { |
| if (text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_SAME_AS_RE)) |
| != string16::npos || |
| text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_USE_MY_RE)) |
| != string16::npos) |
| // This text could be a checkbox label such as "same as my billing |
| // address" or "use my shipping address". |
| // ++ It would help if we generally skipped all text that appears |
| // after a check box. |
| return kGenericAddress; |
| |
| // Not all pages say "billing address" and "shipping address" explicitly; |
| // for example, Craft Catalog1.html has "Bill-to Address" and |
| // "Ship-to Address". |
| size_t bill = text.rfind( |
| l10n_util::GetStringUTF16(IDS_AUTOFILL_BILLING_DESIGNATOR_RE)); |
| size_t ship = text.rfind( |
| l10n_util::GetStringUTF16(IDS_AUTOFILL_SHIPPING_DESIGNATOR_RE)); |
| |
| if (bill == string16::npos && ship == string16::npos) |
| return kGenericAddress; |
| |
| if (bill != string16::npos && ship == string16::npos) |
| return kBillingAddress; |
| |
| if (bill == string16::npos && ship != string16::npos) |
| return kShippingAddress; |
| |
| if (bill > ship) |
| return kBillingAddress; |
| |
| return kShippingAddress; |
| } |