src/com/android/dialer/dialpad/SmartDialNameMatcher.java - platform/packages/apps/Dialer - Git at Google

 /*
  * Copyright (C) 2012 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package com.android.dialer.dialpad;

 import android.text.TextUtils;

 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.Lists;

 import java.util.ArrayList;

 /**
  * {@link #SmartDialNameMatcher} contains utility functions to remove accents from accented
  * characters and normalize a phone number. It also contains the matching logic that determines if
  * a contact's display name matches a numeric query. The boolean variable
  * {@link #ALLOW_INITIAL_MATCH} controls the behavior of the matching logic and determines
  * whether we allow matches like 57 - (J)ohn (S)mith.
  */
 public class SmartDialNameMatcher {

     private final String mQuery;

     public static final char[] LATIN_LETTERS_TO_DIGITS = {
         '2', '2', '2', // A,B,C -> 2
         '3', '3', '3', // D,E,F -> 3
         '4', '4', '4', // G,H,I -> 4
         '5', '5', '5', // J,K,L -> 5
         '6', '6', '6', // M,N,O -> 6
         '7', '7', '7', '7', // P,Q,R,S -> 7
         '8', '8', '8', // T,U,V -> 8
         '9', '9', '9', '9' // W,X,Y,Z -> 9
     };

     // Whether or not we allow matches like 57 - (J)ohn (S)mith
     private static final boolean ALLOW_INITIAL_MATCH = true;

     // The maximum length of the initial we will match - typically set to 1 to minimize false
     // positives
     private static final int INITIAL_LENGTH_LIMIT = 1;

     /*
      * The switch statement in this function was generated using the python code:
      * from unidecode import unidecode
      * for i in range(192, 564):
      *     char = unichr(i)
      *     decoded = unidecode(char)
      *     # Unicode characters that decompose into multiple characters i.e.
      *     #  into ss are not supported for now
      *     if (len(decoded) == 1 and decoded.isalpha()):
      *         print "case '" + char + "': return '" + unidecode(char) +  "';"
      *
      * This gives us a way to map characters containing accents/diacritics to their
      * alphabetic equivalents. The unidecode library can be found at:
      * http://pypi.python.org/pypi/Unidecode/0.04.1
      *
      * Also remaps all upper case latin characters to their lower case equivalents.
      */
     public static char remapAccentedChars(char c) {
         switch (c) {
             case 'À': return 'a';
             case 'Á': return 'a';
             case 'Â': return 'a';
             case 'Ã': return 'a';
             case 'Ä': return 'a';
             case 'Å': return 'a';
             case 'Ç': return 'c';
             case 'È': return 'e';
             case 'É': return 'e';
             case 'Ê': return 'e';
             case 'Ë': return 'e';
             case 'Ì': return 'i';
             case 'Í': return 'i';
             case 'Î': return 'i';
             case 'Ï': return 'i';
             case 'Ð': return 'd';
             case 'Ñ': return 'n';
             case 'Ò': return 'o';
             case 'Ó': return 'o';
             case 'Ô': return 'o';
             case 'Õ': return 'o';
             case 'Ö': return 'o';
             case '×': return 'x';
             case 'Ø': return 'o';
             case 'Ù': return 'u';
             case 'Ú': return 'u';
             case 'Û': return 'u';
             case 'Ü': return 'u';
             case 'Ý': return 'u';
             case 'à': return 'a';
             case 'á': return 'a';
             case 'â': return 'a';
             case 'ã': return 'a';
             case 'ä': return 'a';
             case 'å': return 'a';
             case 'ç': return 'c';
             case 'è': return 'e';
             case 'é': return 'e';
             case 'ê': return 'e';
             case 'ë': return 'e';
             case 'ì': return 'i';
             case 'í': return 'i';
             case 'î': return 'i';
             case 'ï': return 'i';
             case 'ð': return 'd';
             case 'ñ': return 'n';
             case 'ò': return 'o';
             case 'ó': return 'o';
             case 'ô': return 'o';
             case 'õ': return 'o';
             case 'ö': return 'o';
             case 'ø': return 'o';
             case 'ù': return 'u';
             case 'ú': return 'u';
             case 'û': return 'u';
             case 'ü': return 'u';
             case 'ý': return 'y';
             case 'ÿ': return 'y';
             case 'Ā': return 'a';
             case 'ā': return 'a';
             case 'Ă': return 'a';
             case 'ă': return 'a';
             case 'Ą': return 'a';
             case 'ą': return 'a';
             case 'Ć': return 'c';
             case 'ć': return 'c';
             case 'Ĉ': return 'c';
             case 'ĉ': return 'c';
             case 'Ċ': return 'c';
             case 'ċ': return 'c';
             case 'Č': return 'c';
             case 'č': return 'c';
             case 'Ď': return 'd';
             case 'ď': return 'd';
             case 'Đ': return 'd';
             case 'đ': return 'd';
             case 'Ē': return 'e';
             case 'ē': return 'e';
             case 'Ĕ': return 'e';
             case 'ĕ': return 'e';
             case 'Ė': return 'e';
             case 'ė': return 'e';
             case 'Ę': return 'e';
             case 'ę': return 'e';
             case 'Ě': return 'e';
             case 'ě': return 'e';
             case 'Ĝ': return 'g';
             case 'ĝ': return 'g';
             case 'Ğ': return 'g';
             case 'ğ': return 'g';
             case 'Ġ': return 'g';
             case 'ġ': return 'g';
             case 'Ģ': return 'g';
             case 'ģ': return 'g';
             case 'Ĥ': return 'h';
             case 'ĥ': return 'h';
             case 'Ħ': return 'h';
             case 'ħ': return 'h';
             case 'Ĩ': return 'i';
             case 'ĩ': return 'i';
             case 'Ī': return 'i';
             case 'ī': return 'i';
             case 'Ĭ': return 'i';
             case 'ĭ': return 'i';
             case 'Į': return 'i';
             case 'į': return 'i';
             case 'İ': return 'i';
             case 'ı': return 'i';
             case 'Ĵ': return 'j';
             case 'ĵ': return 'j';
             case 'Ķ': return 'k';
             case 'ķ': return 'k';
             case 'ĸ': return 'k';
             case 'Ĺ': return 'l';
             case 'ĺ': return 'l';
             case 'Ļ': return 'l';
             case 'ļ': return 'l';
             case 'Ľ': return 'l';
             case 'ľ': return 'l';
             case 'Ŀ': return 'l';
             case 'ŀ': return 'l';
             case 'Ł': return 'l';
             case 'ł': return 'l';
             case 'Ń': return 'n';
             case 'ń': return 'n';
             case 'Ņ': return 'n';
             case 'ņ': return 'n';
             case 'Ň': return 'n';
             case 'ň': return 'n';
             case 'Ō': return 'o';
             case 'ō': return 'o';
             case 'Ŏ': return 'o';
             case 'ŏ': return 'o';
             case 'Ő': return 'o';
             case 'ő': return 'o';
             case 'Ŕ': return 'r';
             case 'ŕ': return 'r';
             case 'Ŗ': return 'r';
             case 'ŗ': return 'r';
             case 'Ř': return 'r';
             case 'ř': return 'r';
             case 'Ś': return 's';
             case 'ś': return 's';
             case 'Ŝ': return 's';
             case 'ŝ': return 's';
             case 'Ş': return 's';
             case 'ş': return 's';
             case 'Š': return 's';
             case 'š': return 's';
             case 'Ţ': return 't';
             case 'ţ': return 't';
             case 'Ť': return 't';
             case 'ť': return 't';
             case 'Ŧ': return 't';
             case 'ŧ': return 't';
             case 'Ũ': return 'u';
             case 'ũ': return 'u';
             case 'Ū': return 'u';
             case 'ū': return 'u';
             case 'Ŭ': return 'u';
             case 'ŭ': return 'u';
             case 'Ů': return 'u';
             case 'ů': return 'u';
             case 'Ű': return 'u';
             case 'ű': return 'u';
             case 'Ų': return 'u';
             case 'ų': return 'u';
             case 'Ŵ': return 'w';
             case 'ŵ': return 'w';
             case 'Ŷ': return 'y';
             case 'ŷ': return 'y';
             case 'Ÿ': return 'y';
             case 'Ź': return 'z';
             case 'ź': return 'z';
             case 'Ż': return 'z';
             case 'ż': return 'z';
             case 'Ž': return 'z';
             case 'ž': return 'z';
             case 'ſ': return 's';
             case 'ƀ': return 'b';
             case 'Ɓ': return 'b';
             case 'Ƃ': return 'b';
             case 'ƃ': return 'b';
             case 'Ɔ': return 'o';
             case 'Ƈ': return 'c';
             case 'ƈ': return 'c';
             case 'Ɖ': return 'd';
             case 'Ɗ': return 'd';
             case 'Ƌ': return 'd';
             case 'ƌ': return 'd';
             case 'ƍ': return 'd';
             case 'Ɛ': return 'e';
             case 'Ƒ': return 'f';
             case 'ƒ': return 'f';
             case 'Ɠ': return 'g';
             case 'Ɣ': return 'g';
             case 'Ɩ': return 'i';
             case 'Ɨ': return 'i';
             case 'Ƙ': return 'k';
             case 'ƙ': return 'k';
             case 'ƚ': return 'l';
             case 'ƛ': return 'l';
             case 'Ɯ': return 'w';
             case 'Ɲ': return 'n';
             case 'ƞ': return 'n';
             case 'Ɵ': return 'o';
             case 'Ơ': return 'o';
             case 'ơ': return 'o';
             case 'Ƥ': return 'p';
             case 'ƥ': return 'p';
             case 'ƫ': return 't';
             case 'Ƭ': return 't';
             case 'ƭ': return 't';
             case 'Ʈ': return 't';
             case 'Ư': return 'u';
             case 'ư': return 'u';
             case 'Ʊ': return 'y';
             case 'Ʋ': return 'v';
             case 'Ƴ': return 'y';
             case 'ƴ': return 'y';
             case 'Ƶ': return 'z';
             case 'ƶ': return 'z';
             case 'ƿ': return 'w';
             case 'Ǎ': return 'a';
             case 'ǎ': return 'a';
             case 'Ǐ': return 'i';
             case 'ǐ': return 'i';
             case 'Ǒ': return 'o';
             case 'ǒ': return 'o';
             case 'Ǔ': return 'u';
             case 'ǔ': return 'u';
             case 'Ǖ': return 'u';
             case 'ǖ': return 'u';
             case 'Ǘ': return 'u';
             case 'ǘ': return 'u';
             case 'Ǚ': return 'u';
             case 'ǚ': return 'u';
             case 'Ǜ': return 'u';
             case 'ǜ': return 'u';
             case 'Ǟ': return 'a';
             case 'ǟ': return 'a';
             case 'Ǡ': return 'a';
             case 'ǡ': return 'a';
             case 'Ǥ': return 'g';
             case 'ǥ': return 'g';
             case 'Ǧ': return 'g';
             case 'ǧ': return 'g';
             case 'Ǩ': return 'k';
             case 'ǩ': return 'k';
             case 'Ǫ': return 'o';
             case 'ǫ': return 'o';
             case 'Ǭ': return 'o';
             case 'ǭ': return 'o';
             case 'ǰ': return 'j';
             case 'ǲ': return 'd';
             case 'Ǵ': return 'g';
             case 'ǵ': return 'g';
             case 'Ƿ': return 'w';
             case 'Ǹ': return 'n';
             case 'ǹ': return 'n';
             case 'Ǻ': return 'a';
             case 'ǻ': return 'a';
             case 'Ǿ': return 'o';
             case 'ǿ': return 'o';
             case 'Ȁ': return 'a';
             case 'ȁ': return 'a';
             case 'Ȃ': return 'a';
             case 'ȃ': return 'a';
             case 'Ȅ': return 'e';
             case 'ȅ': return 'e';
             case 'Ȇ': return 'e';
             case 'ȇ': return 'e';
             case 'Ȉ': return 'i';
             case 'ȉ': return 'i';
             case 'Ȋ': return 'i';
             case 'ȋ': return 'i';
             case 'Ȍ': return 'o';
             case 'ȍ': return 'o';
             case 'Ȏ': return 'o';
             case 'ȏ': return 'o';
             case 'Ȑ': return 'r';
             case 'ȑ': return 'r';
             case 'Ȓ': return 'r';
             case 'ȓ': return 'r';
             case 'Ȕ': return 'u';
             case 'ȕ': return 'u';
             case 'Ȗ': return 'u';
             case 'ȗ': return 'u';
             case 'Ș': return 's';
             case 'ș': return 's';
             case 'Ț': return 't';
             case 'ț': return 't';
             case 'Ȝ': return 'y';
             case 'ȝ': return 'y';
             case 'Ȟ': return 'h';
             case 'ȟ': return 'h';
             case 'Ȥ': return 'z';
             case 'ȥ': return 'z';
             case 'Ȧ': return 'a';
             case 'ȧ': return 'a';
             case 'Ȩ': return 'e';
             case 'ȩ': return 'e';
             case 'Ȫ': return 'o';
             case 'ȫ': return 'o';
             case 'Ȭ': return 'o';
             case 'ȭ': return 'o';
             case 'Ȯ': return 'o';
             case 'ȯ': return 'o';
             case 'Ȱ': return 'o';
             case 'ȱ': return 'o';
             case 'Ȳ': return 'y';
             case 'ȳ': return 'y';
             case 'A': return 'a';
             case 'B': return 'b';
             case 'C': return 'c';
             case 'D': return 'd';
             case 'E': return 'e';
             case 'F': return 'f';
             case 'G': return 'g';
             case 'H': return 'h';
             case 'I': return 'i';
             case 'J': return 'j';
             case 'K': return 'k';
             case 'L': return 'l';
             case 'M': return 'm';
             case 'N': return 'n';
             case 'O': return 'o';
             case 'P': return 'p';
             case 'Q': return 'q';
             case 'R': return 'r';
             case 'S': return 's';
             case 'T': return 't';
             case 'U': return 'u';
             case 'V': return 'v';
             case 'W': return 'w';
             case 'X': return 'x';
             case 'Y': return 'y';
             case 'Z': return 'z';
             default: return c;
         }
     }

     private final ArrayList<SmartDialMatchPosition> mMatchPositions = Lists.newArrayList();

     public SmartDialNameMatcher(String query) {
         mQuery = query;
     }

     /**
      * Strips a phone number of unnecessary characters (spaces, dashes, etc.)
      *
      * @param number Phone number we want to normalize
      * @return Phone number consisting of digits from 0-9
      */
     public static String normalizeNumber(String number) {
         final StringBuilder s = new StringBuilder();
         for (int i = 0; i < number.length(); i++) {
             char ch = number.charAt(i);
             if (ch >= '0' && ch <= '9') {
                 s.append(ch);
             }
         }
         return s.toString();
     }

     /**
      * Matches a phone number against a query, taking care of formatting characters
      *
      * @param phoneNumber - Raw phone number
      * @param query - Normalized query (only contains numbers from 0-9)
      * @return {@literal null} if the number and the query don't match, a valid
      *         SmartDialMatchPosition with the matching positions otherwise
      */
     public SmartDialMatchPosition matchesNumber(String phoneNumber, String query) {
         if (TextUtils.isEmpty(phoneNumber) || TextUtils.isEmpty(query)) {
             return null;
         }
         int queryAt = 0;
         int numberAt = 0;
         for (int i = 0; i < phoneNumber.length(); i++) {
             if (queryAt == query.length()) {
                 break;
             }
             char ch = phoneNumber.charAt(i);
             if (ch >= '0' && ch <= '9') {
                 if (ch != query.charAt(queryAt)) {
                     return null;
                 }
                 queryAt++;
                 numberAt++;
             } else {
                 numberAt++;
                 continue;
             }
         }
         return new SmartDialMatchPosition(0, numberAt);
     }

     /**
      * This function iterates through each token in the display name, trying to match the query
      * to the numeric equivalent of the token.
      *
      * A token is defined as a range in the display name delimited by characters that have no
      * latin alphabet equivalents (e.g. spaces - ' ', periods - ',', underscores - '_' or chinese
      * characters - '王'). Transliteration from non-latin characters to latin character will be
      * done on a best effort basis - e.g. 'Ü' - 'u'.
      *
      * For example,
      * the display name "Phillips Thomas Jr" contains three tokens: "phillips", "thomas", and "jr".
      *
      * A match must begin at the start of a token.
      * For example, typing 846(Tho) would match "Phillips Thomas", but 466(hom) would not.
      *
      * Also, a match can extend across tokens.
      * For example, typing 37337(FredS) would match (Fred S)mith.
      *
      * @param displayName The normalized(no accented characters) display name we intend to match
      * against.
      * @param query The string of digits that we want to match the display name to.
      * @param matchList An array list of {@link SmartDialMatchPosition}s that we add matched
      * positions to.
      * @return Returns true if a combination of the tokens in displayName match the query
      * string contained in query. If the function returns true, matchList will contain an
      * ArrayList of match positions (multiple matches correspond to initial matches).
      */
     @VisibleForTesting
     boolean matchesCombination(String displayName, String query,
             ArrayList<SmartDialMatchPosition> matchList) {
         final int nameLength = displayName.length();
         final int queryLength = query.length();

         if (nameLength < queryLength) {
             return false;
         }

         if (queryLength == 0) {
             return false;
         }

         // The current character index in displayName
         // E.g. 3 corresponds to 'd' in "Fred Smith"
         int nameStart = 0;

         // The current character in the query we are trying to match the displayName against
         int queryStart = 0;

         // The start position of the current token we are inspecting
         int tokenStart = 0;

         // The number of non-alphabetic characters we've encountered so far in the current match.
         // E.g. if we've currently matched 3733764849 to (Fred Smith W)illiam, then the
         // seperatorCount should be 2. This allows us to correctly calculate offsets for the match
         // positions
         int seperatorCount = 0;

         ArrayList<SmartDialMatchPosition> partial = new ArrayList<SmartDialMatchPosition>();
         // Keep going until we reach the end of displayName
         while (nameStart < nameLength && queryStart < queryLength) {
             char ch = displayName.charAt(nameStart);
             // Strip diacritics from accented characters if any
             ch = remapAccentedChars(ch);
             if (isLowercaseLatin(ch)) {
                 // a starts at index 0
                 if (LATIN_LETTERS_TO_DIGITS[ch - 'a'] != query.charAt(queryStart)) {
                     // we did not find a match
                     queryStart = 0;
                     seperatorCount = 0;
                     while (nameStart < nameLength &&
                             isLowercaseLatin(remapAccentedChars(displayName.charAt(nameStart)))) {
                         nameStart++;
                     }
                     nameStart++;
                     tokenStart = nameStart;
                 } else {
                     if (queryStart == queryLength - 1) {

                         // As much as possible, we prioritize a full token match over a sub token
                         // one so if we find a full token match, we can return right away
                         matchList.add(new SmartDialMatchPosition(
                                 tokenStart, queryLength + tokenStart + seperatorCount));
                         return true;
                     } else if (ALLOW_INITIAL_MATCH && queryStart < INITIAL_LENGTH_LIMIT) {
                         // we matched the first character.
                         // branch off and see if we can find another match with the remaining
                         // characters in the query string and the remaining tokens
                         // find the next separator in the query string
                         int j;
                         for (j = nameStart; j < nameLength; j++) {
                             if (!isLowercaseLatin(remapAccentedChars(displayName.charAt(j)))) {
                                 break;
                             }
                         }
                         // this means there is at least one character left after the separator
                         if (j < nameLength - 1) {
                             final String remainder = displayName.substring(j + 1);
                             final ArrayList<SmartDialMatchPosition> partialTemp =
                                     Lists.newArrayList();
                             if (matchesCombination(
                                     remainder, query.substring(queryStart + 1), partialTemp)) {

                                 // store the list of possible match positions
                                 SmartDialMatchPosition.advanceMatchPositions(partialTemp, j + 1);
                                 partialTemp.add(0,
                                         new SmartDialMatchPosition(nameStart, nameStart + 1));
                                 // we found a partial token match, store the data in a
                                 // temp buffer and return it if we end up not finding a full
                                 // token match
                                 partial = partialTemp;
                             }
                         }
                     }
                     nameStart++;
                     queryStart++;
                     // we matched the current character in the name against one in the query,
                     // continue and see if the rest of the characters match
                 }
             } else {
                 // found a separator, we skip this character and continue to the next one
                 nameStart++;
                 if (queryStart == 0) {
                     // This means we found a separator before the start of a token,
                     // so we should increment the token's start position to reflect its true
                     // start position
                     tokenStart = nameStart;
                 } else {
                     // Otherwise this separator was found in the middle of a token being matched,
                     // so increase the separator count
                     seperatorCount++;
                 }
             }
         }
         // if we have no complete match at this point, then we attempt to fall back to the partial
         // token match(if any). If we don't allow initial matching (ALLOW_INITIAL_MATCH = false)
         // then partial will always be empty.
         if (!partial.isEmpty()) {
             matchList.addAll(partial);
             return true;
         }
         return false;
     }

     /*
      * Returns true if the character is a lowercase latin character(i.e. non-separator).
      */
     private boolean isLowercaseLatin(char ch) {
         return ch >= 'a' && ch <= 'z';
     }

     public boolean matches(String displayName) {
         mMatchPositions.clear();
         return matchesCombination(displayName, mQuery, mMatchPositions);
     }

     public ArrayList<SmartDialMatchPosition> getMatchPositions() {
         // Return a clone of mMatchPositions so that the caller can use it without
         // worrying about it changing
         return new ArrayList<SmartDialMatchPosition>(mMatchPositions);
     }

     public String getQuery() {
         return mQuery;
     }
 }
	/*
	* Copyright (C) 2012 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package com.android.dialer.dialpad;

	import android.text.TextUtils;

	import com.google.common.annotations.VisibleForTesting;
	import com.google.common.collect.Lists;

	import java.util.ArrayList;

	/**
	* {@link #SmartDialNameMatcher} contains utility functions to remove accents from accented
	* characters and normalize a phone number. It also contains the matching logic that determines if
	* a contact's display name matches a numeric query. The boolean variable
	* {@link #ALLOW_INITIAL_MATCH} controls the behavior of the matching logic and determines
	* whether we allow matches like 57 - (J)ohn (S)mith.
	*/
	public class SmartDialNameMatcher {

	private final String mQuery;

	public static final char[] LATIN_LETTERS_TO_DIGITS = {
	'2', '2', '2', // A,B,C -> 2
	'3', '3', '3', // D,E,F -> 3
	'4', '4', '4', // G,H,I -> 4
	'5', '5', '5', // J,K,L -> 5
	'6', '6', '6', // M,N,O -> 6
	'7', '7', '7', '7', // P,Q,R,S -> 7
	'8', '8', '8', // T,U,V -> 8
	'9', '9', '9', '9' // W,X,Y,Z -> 9
	};

	// Whether or not we allow matches like 57 - (J)ohn (S)mith
	private static final boolean ALLOW_INITIAL_MATCH = true;

	// The maximum length of the initial we will match - typically set to 1 to minimize false
	// positives
	private static final int INITIAL_LENGTH_LIMIT = 1;

	/*
	* The switch statement in this function was generated using the python code:
	* from unidecode import unidecode
	* for i in range(192, 564):
	* char = unichr(i)
	* decoded = unidecode(char)
	* # Unicode characters that decompose into multiple characters i.e.
	* # into ss are not supported for now
	* if (len(decoded) == 1 and decoded.isalpha()):
	* print "case '" + char + "': return '" + unidecode(char) + "';"
	*
	* This gives us a way to map characters containing accents/diacritics to their
	* alphabetic equivalents. The unidecode library can be found at:
	* http://pypi.python.org/pypi/Unidecode/0.04.1
	*
	* Also remaps all upper case latin characters to their lower case equivalents.
	*/
	public static char remapAccentedChars(char c) {
	switch (c) {
	case 'À': return 'a';
	case 'Á': return 'a';
	case 'Â': return 'a';
	case 'Ã': return 'a';
	case 'Ä': return 'a';
	case 'Å': return 'a';
	case 'Ç': return 'c';
	case 'È': return 'e';
	case 'É': return 'e';
	case 'Ê': return 'e';
	case 'Ë': return 'e';
	case 'Ì': return 'i';
	case 'Í': return 'i';
	case 'Î': return 'i';
	case 'Ï': return 'i';
	case 'Ð': return 'd';
	case 'Ñ': return 'n';
	case 'Ò': return 'o';
	case 'Ó': return 'o';
	case 'Ô': return 'o';
	case 'Õ': return 'o';
	case 'Ö': return 'o';
	case '×': return 'x';
	case 'Ø': return 'o';
	case 'Ù': return 'u';
	case 'Ú': return 'u';
	case 'Û': return 'u';
	case 'Ü': return 'u';
	case 'Ý': return 'u';
	case 'à': return 'a';
	case 'á': return 'a';
	case 'â': return 'a';
	case 'ã': return 'a';
	case 'ä': return 'a';
	case 'å': return 'a';
	case 'ç': return 'c';
	case 'è': return 'e';
	case 'é': return 'e';
	case 'ê': return 'e';
	case 'ë': return 'e';
	case 'ì': return 'i';
	case 'í': return 'i';
	case 'î': return 'i';
	case 'ï': return 'i';
	case 'ð': return 'd';
	case 'ñ': return 'n';
	case 'ò': return 'o';
	case 'ó': return 'o';
	case 'ô': return 'o';
	case 'õ': return 'o';
	case 'ö': return 'o';
	case 'ø': return 'o';
	case 'ù': return 'u';
	case 'ú': return 'u';
	case 'û': return 'u';
	case 'ü': return 'u';
	case 'ý': return 'y';
	case 'ÿ': return 'y';
	case 'Ā': return 'a';
	case 'ā': return 'a';
	case 'Ă': return 'a';
	case 'ă': return 'a';
	case 'Ą': return 'a';
	case 'ą': return 'a';
	case 'Ć': return 'c';
	case 'ć': return 'c';
	case 'Ĉ': return 'c';
	case 'ĉ': return 'c';
	case 'Ċ': return 'c';
	case 'ċ': return 'c';
	case 'Č': return 'c';
	case 'č': return 'c';
	case 'Ď': return 'd';
	case 'ď': return 'd';
	case 'Đ': return 'd';
	case 'đ': return 'd';
	case 'Ē': return 'e';
	case 'ē': return 'e';
	case 'Ĕ': return 'e';
	case 'ĕ': return 'e';
	case 'Ė': return 'e';
	case 'ė': return 'e';
	case 'Ę': return 'e';
	case 'ę': return 'e';
	case 'Ě': return 'e';
	case 'ě': return 'e';
	case 'Ĝ': return 'g';
	case 'ĝ': return 'g';
	case 'Ğ': return 'g';
	case 'ğ': return 'g';
	case 'Ġ': return 'g';
	case 'ġ': return 'g';
	case 'Ģ': return 'g';
	case 'ģ': return 'g';
	case 'Ĥ': return 'h';
	case 'ĥ': return 'h';
	case 'Ħ': return 'h';
	case 'ħ': return 'h';
	case 'Ĩ': return 'i';
	case 'ĩ': return 'i';
	case 'Ī': return 'i';
	case 'ī': return 'i';
	case 'Ĭ': return 'i';
	case 'ĭ': return 'i';
	case 'Į': return 'i';
	case 'į': return 'i';
	case 'İ': return 'i';
	case 'ı': return 'i';
	case 'Ĵ': return 'j';
	case 'ĵ': return 'j';
	case 'Ķ': return 'k';
	case 'ķ': return 'k';
	case 'ĸ': return 'k';
	case 'Ĺ': return 'l';
	case 'ĺ': return 'l';
	case 'Ļ': return 'l';
	case 'ļ': return 'l';
	case 'Ľ': return 'l';
	case 'ľ': return 'l';
	case 'Ŀ': return 'l';
	case 'ŀ': return 'l';
	case 'Ł': return 'l';
	case 'ł': return 'l';
	case 'Ń': return 'n';
	case 'ń': return 'n';
	case 'Ņ': return 'n';
	case 'ņ': return 'n';
	case 'Ň': return 'n';
	case 'ň': return 'n';
	case 'Ō': return 'o';
	case 'ō': return 'o';
	case 'Ŏ': return 'o';
	case 'ŏ': return 'o';
	case 'Ő': return 'o';
	case 'ő': return 'o';
	case 'Ŕ': return 'r';
	case 'ŕ': return 'r';
	case 'Ŗ': return 'r';
	case 'ŗ': return 'r';
	case 'Ř': return 'r';
	case 'ř': return 'r';
	case 'Ś': return 's';
	case 'ś': return 's';
	case 'Ŝ': return 's';
	case 'ŝ': return 's';
	case 'Ş': return 's';
	case 'ş': return 's';
	case 'Š': return 's';
	case 'š': return 's';
	case 'Ţ': return 't';
	case 'ţ': return 't';
	case 'Ť': return 't';
	case 'ť': return 't';
	case 'Ŧ': return 't';
	case 'ŧ': return 't';
	case 'Ũ': return 'u';
	case 'ũ': return 'u';
	case 'Ū': return 'u';
	case 'ū': return 'u';
	case 'Ŭ': return 'u';
	case 'ŭ': return 'u';
	case 'Ů': return 'u';
	case 'ů': return 'u';
	case 'Ű': return 'u';
	case 'ű': return 'u';
	case 'Ų': return 'u';
	case 'ų': return 'u';
	case 'Ŵ': return 'w';
	case 'ŵ': return 'w';
	case 'Ŷ': return 'y';
	case 'ŷ': return 'y';
	case 'Ÿ': return 'y';
	case 'Ź': return 'z';
	case 'ź': return 'z';
	case 'Ż': return 'z';
	case 'ż': return 'z';
	case 'Ž': return 'z';
	case 'ž': return 'z';
	case 'ſ': return 's';
	case 'ƀ': return 'b';
	case 'Ɓ': return 'b';
	case 'Ƃ': return 'b';
	case 'ƃ': return 'b';
	case 'Ɔ': return 'o';
	case 'Ƈ': return 'c';
	case 'ƈ': return 'c';
	case 'Ɖ': return 'd';
	case 'Ɗ': return 'd';
	case 'Ƌ': return 'd';
	case 'ƌ': return 'd';
	case 'ƍ': return 'd';
	case 'Ɛ': return 'e';
	case 'Ƒ': return 'f';
	case 'ƒ': return 'f';
	case 'Ɠ': return 'g';
	case 'Ɣ': return 'g';
	case 'Ɩ': return 'i';
	case 'Ɨ': return 'i';
	case 'Ƙ': return 'k';
	case 'ƙ': return 'k';
	case 'ƚ': return 'l';
	case 'ƛ': return 'l';
	case 'Ɯ': return 'w';
	case 'Ɲ': return 'n';
	case 'ƞ': return 'n';
	case 'Ɵ': return 'o';
	case 'Ơ': return 'o';
	case 'ơ': return 'o';
	case 'Ƥ': return 'p';
	case 'ƥ': return 'p';
	case 'ƫ': return 't';
	case 'Ƭ': return 't';
	case 'ƭ': return 't';
	case 'Ʈ': return 't';
	case 'Ư': return 'u';
	case 'ư': return 'u';
	case 'Ʊ': return 'y';
	case 'Ʋ': return 'v';
	case 'Ƴ': return 'y';
	case 'ƴ': return 'y';
	case 'Ƶ': return 'z';
	case 'ƶ': return 'z';
	case 'ƿ': return 'w';
	case 'Ǎ': return 'a';
	case 'ǎ': return 'a';
	case 'Ǐ': return 'i';
	case 'ǐ': return 'i';
	case 'Ǒ': return 'o';
	case 'ǒ': return 'o';
	case 'Ǔ': return 'u';
	case 'ǔ': return 'u';
	case 'Ǖ': return 'u';
	case 'ǖ': return 'u';
	case 'Ǘ': return 'u';
	case 'ǘ': return 'u';
	case 'Ǚ': return 'u';
	case 'ǚ': return 'u';
	case 'Ǜ': return 'u';
	case 'ǜ': return 'u';
	case 'Ǟ': return 'a';
	case 'ǟ': return 'a';
	case 'Ǡ': return 'a';
	case 'ǡ': return 'a';
	case 'Ǥ': return 'g';
	case 'ǥ': return 'g';
	case 'Ǧ': return 'g';
	case 'ǧ': return 'g';
	case 'Ǩ': return 'k';
	case 'ǩ': return 'k';
	case 'Ǫ': return 'o';
	case 'ǫ': return 'o';
	case 'Ǭ': return 'o';
	case 'ǭ': return 'o';
	case 'ǰ': return 'j';
	case 'ǲ': return 'd';
	case 'Ǵ': return 'g';
	case 'ǵ': return 'g';
	case 'Ƿ': return 'w';
	case 'Ǹ': return 'n';
	case 'ǹ': return 'n';
	case 'Ǻ': return 'a';
	case 'ǻ': return 'a';
	case 'Ǿ': return 'o';
	case 'ǿ': return 'o';
	case 'Ȁ': return 'a';
	case 'ȁ': return 'a';
	case 'Ȃ': return 'a';
	case 'ȃ': return 'a';
	case 'Ȅ': return 'e';
	case 'ȅ': return 'e';
	case 'Ȇ': return 'e';
	case 'ȇ': return 'e';
	case 'Ȉ': return 'i';
	case 'ȉ': return 'i';
	case 'Ȋ': return 'i';
	case 'ȋ': return 'i';
	case 'Ȍ': return 'o';
	case 'ȍ': return 'o';
	case 'Ȏ': return 'o';
	case 'ȏ': return 'o';
	case 'Ȑ': return 'r';
	case 'ȑ': return 'r';
	case 'Ȓ': return 'r';
	case 'ȓ': return 'r';
	case 'Ȕ': return 'u';
	case 'ȕ': return 'u';
	case 'Ȗ': return 'u';
	case 'ȗ': return 'u';
	case 'Ș': return 's';
	case 'ș': return 's';
	case 'Ț': return 't';
	case 'ț': return 't';
	case 'Ȝ': return 'y';
	case 'ȝ': return 'y';
	case 'Ȟ': return 'h';
	case 'ȟ': return 'h';
	case 'Ȥ': return 'z';
	case 'ȥ': return 'z';
	case 'Ȧ': return 'a';
	case 'ȧ': return 'a';
	case 'Ȩ': return 'e';
	case 'ȩ': return 'e';
	case 'Ȫ': return 'o';
	case 'ȫ': return 'o';
	case 'Ȭ': return 'o';
	case 'ȭ': return 'o';
	case 'Ȯ': return 'o';
	case 'ȯ': return 'o';
	case 'Ȱ': return 'o';
	case 'ȱ': return 'o';
	case 'Ȳ': return 'y';
	case 'ȳ': return 'y';
	case 'A': return 'a';
	case 'B': return 'b';
	case 'C': return 'c';
	case 'D': return 'd';
	case 'E': return 'e';
	case 'F': return 'f';
	case 'G': return 'g';
	case 'H': return 'h';
	case 'I': return 'i';
	case 'J': return 'j';
	case 'K': return 'k';
	case 'L': return 'l';
	case 'M': return 'm';
	case 'N': return 'n';
	case 'O': return 'o';
	case 'P': return 'p';
	case 'Q': return 'q';
	case 'R': return 'r';
	case 'S': return 's';
	case 'T': return 't';
	case 'U': return 'u';
	case 'V': return 'v';
	case 'W': return 'w';
	case 'X': return 'x';
	case 'Y': return 'y';
	case 'Z': return 'z';
	default: return c;
	}
	}

	private final ArrayList<SmartDialMatchPosition> mMatchPositions = Lists.newArrayList();

	public SmartDialNameMatcher(String query) {
	mQuery = query;
	}

	/**
	* Strips a phone number of unnecessary characters (spaces, dashes, etc.)
	*
	* @param number Phone number we want to normalize
	* @return Phone number consisting of digits from 0-9
	*/
	public static String normalizeNumber(String number) {
	final StringBuilder s = new StringBuilder();
	for (int i = 0; i < number.length(); i++) {
	char ch = number.charAt(i);
	if (ch >= '0' && ch <= '9') {
	s.append(ch);
	}
	}
	return s.toString();
	}

	/**
	* Matches a phone number against a query, taking care of formatting characters
	*
	* @param phoneNumber - Raw phone number
	* @param query - Normalized query (only contains numbers from 0-9)
	* @return {@literal null} if the number and the query don't match, a valid
	* SmartDialMatchPosition with the matching positions otherwise
	*/
	public SmartDialMatchPosition matchesNumber(String phoneNumber, String query) {
	if (TextUtils.isEmpty(phoneNumber) \|\| TextUtils.isEmpty(query)) {
	return null;
	}
	int queryAt = 0;
	int numberAt = 0;
	for (int i = 0; i < phoneNumber.length(); i++) {
	if (queryAt == query.length()) {
	break;
	}
	char ch = phoneNumber.charAt(i);
	if (ch >= '0' && ch <= '9') {
	if (ch != query.charAt(queryAt)) {
	return null;
	}
	queryAt++;
	numberAt++;
	} else {
	numberAt++;
	continue;
	}
	}
	return new SmartDialMatchPosition(0, numberAt);
	}

	/**
	* This function iterates through each token in the display name, trying to match the query
	* to the numeric equivalent of the token.
	*
	* A token is defined as a range in the display name delimited by characters that have no
	* latin alphabet equivalents (e.g. spaces - ' ', periods - ',', underscores - '_' or chinese
	* characters - '王'). Transliteration from non-latin characters to latin character will be
	* done on a best effort basis - e.g. 'Ü' - 'u'.
	*
	* For example,
	* the display name "Phillips Thomas Jr" contains three tokens: "phillips", "thomas", and "jr".
	*
	* A match must begin at the start of a token.
	* For example, typing 846(Tho) would match "Phillips Thomas", but 466(hom) would not.
	*
	* Also, a match can extend across tokens.
	* For example, typing 37337(FredS) would match (Fred S)mith.
	*
	* @param displayName The normalized(no accented characters) display name we intend to match
	* against.
	* @param query The string of digits that we want to match the display name to.
	* @param matchList An array list of {@link SmartDialMatchPosition}s that we add matched
	* positions to.
	* @return Returns true if a combination of the tokens in displayName match the query
	* string contained in query. If the function returns true, matchList will contain an
	* ArrayList of match positions (multiple matches correspond to initial matches).
	*/
	@VisibleForTesting
	boolean matchesCombination(String displayName, String query,
	ArrayList<SmartDialMatchPosition> matchList) {
	final int nameLength = displayName.length();
	final int queryLength = query.length();

	if (nameLength < queryLength) {
	return false;
	}

	if (queryLength == 0) {
	return false;
	}

	// The current character index in displayName
	// E.g. 3 corresponds to 'd' in "Fred Smith"
	int nameStart = 0;

	// The current character in the query we are trying to match the displayName against
	int queryStart = 0;

	// The start position of the current token we are inspecting
	int tokenStart = 0;

	// The number of non-alphabetic characters we've encountered so far in the current match.
	// E.g. if we've currently matched 3733764849 to (Fred Smith W)illiam, then the
	// seperatorCount should be 2. This allows us to correctly calculate offsets for the match
	// positions
	int seperatorCount = 0;

	ArrayList<SmartDialMatchPosition> partial = new ArrayList<SmartDialMatchPosition>();
	// Keep going until we reach the end of displayName
	while (nameStart < nameLength && queryStart < queryLength) {
	char ch = displayName.charAt(nameStart);
	// Strip diacritics from accented characters if any
	ch = remapAccentedChars(ch);
	if (isLowercaseLatin(ch)) {
	// a starts at index 0
	if (LATIN_LETTERS_TO_DIGITS[ch - 'a'] != query.charAt(queryStart)) {
	// we did not find a match
	queryStart = 0;
	seperatorCount = 0;
	while (nameStart < nameLength &&
	isLowercaseLatin(remapAccentedChars(displayName.charAt(nameStart)))) {
	nameStart++;
	}
	nameStart++;
	tokenStart = nameStart;
	} else {
	if (queryStart == queryLength - 1) {

	// As much as possible, we prioritize a full token match over a sub token
	// one so if we find a full token match, we can return right away
	matchList.add(new SmartDialMatchPosition(
	tokenStart, queryLength + tokenStart + seperatorCount));
	return true;
	} else if (ALLOW_INITIAL_MATCH && queryStart < INITIAL_LENGTH_LIMIT) {
	// we matched the first character.
	// branch off and see if we can find another match with the remaining
	// characters in the query string and the remaining tokens
	// find the next separator in the query string
	int j;
	for (j = nameStart; j < nameLength; j++) {
	if (!isLowercaseLatin(remapAccentedChars(displayName.charAt(j)))) {
	break;
	}
	}
	// this means there is at least one character left after the separator
	if (j < nameLength - 1) {
	final String remainder = displayName.substring(j + 1);
	final ArrayList<SmartDialMatchPosition> partialTemp =
	Lists.newArrayList();
	if (matchesCombination(
	remainder, query.substring(queryStart + 1), partialTemp)) {

	// store the list of possible match positions
	SmartDialMatchPosition.advanceMatchPositions(partialTemp, j + 1);
	partialTemp.add(0,
	new SmartDialMatchPosition(nameStart, nameStart + 1));
	// we found a partial token match, store the data in a
	// temp buffer and return it if we end up not finding a full
	// token match
	partial = partialTemp;
	}
	}
	}
	nameStart++;
	queryStart++;
	// we matched the current character in the name against one in the query,
	// continue and see if the rest of the characters match
	}
	} else {
	// found a separator, we skip this character and continue to the next one
	nameStart++;
	if (queryStart == 0) {
	// This means we found a separator before the start of a token,
	// so we should increment the token's start position to reflect its true
	// start position
	tokenStart = nameStart;
	} else {
	// Otherwise this separator was found in the middle of a token being matched,
	// so increase the separator count
	seperatorCount++;
	}
	}
	}
	// if we have no complete match at this point, then we attempt to fall back to the partial
	// token match(if any). If we don't allow initial matching (ALLOW_INITIAL_MATCH = false)
	// then partial will always be empty.
	if (!partial.isEmpty()) {
	matchList.addAll(partial);
	return true;
	}
	return false;
	}

	/*
	* Returns true if the character is a lowercase latin character(i.e. non-separator).
	*/
	private boolean isLowercaseLatin(char ch) {
	return ch >= 'a' && ch <= 'z';
	}

	public boolean matches(String displayName) {
	mMatchPositions.clear();
	return matchesCombination(displayName, mQuery, mMatchPositions);
	}

	public ArrayList<SmartDialMatchPosition> getMatchPositions() {
	// Return a clone of mMatchPositions so that the caller can use it without
	// worrying about it changing
	return new ArrayList<SmartDialMatchPosition>(mMatchPositions);
	}

	public String getQuery() {
	return mQuery;
	}
	}