Fix name matching bug with non space separators Bug: 8435819 Change-Id: I69c32207d123e5da4ae5421a5fe83ffeee4e5070

commit: 69ae7985ce26709b0acd9c376c751b8fb0ba4528 [log] [tgz]
author: Yorke Lee <yorkelee@google.com> Thu Mar 21 18:22:18 2013 -0700
committer: Yorke Lee <yorkelee@google.com> Mon Apr 08 11:56:03 2013 -0700
tree: 7f8e4a06e6bd13fd386d104c63d089aeaef64c68
parent: f5bc56a398b20eed03df82f5dacd86aebc35d954 [diff]
diff --git a/src/com/android/dialer/dialpad/SmartDialNameMatcher.java b/src/com/android/dialer/dialpad/SmartDialNameMatcher.java
index 381e747..f805abf 100644
--- a/src/com/android/dialer/dialpad/SmartDialNameMatcher.java
+++ b/src/com/android/dialer/dialpad/SmartDialNameMatcher.java

@@ -66,6 +66,8 @@
      * This gives us a way to map characters containing accents/diacritics to their
      * alphabetic equivalents. The unidecode library can be found at:
      * http://pypi.python.org/pypi/Unidecode/0.04.1
+     *
+     * Also remaps all upper case latin characters to their lower case equivalents.
      */
     public static char remapAccentedChars(char c) {
         switch (c) {
@@ -471,7 +473,12 @@
      * This function iterates through each token in the display name, trying to match the query
      * to the numeric equivalent of the token.
      *
-     * A token is defined as a range in the display name delimited by whitespace. For example,
+     * A token is defined as a range in the display name delimited by characters that have no
+     * latin alphabet equivalents (e.g. spaces - ' ', periods - ',', underscores - '_' or chinese
+     * characters - '王'). Transliteration from non-latin characters to latin character will be
+     * done on a best effort basis - e.g. 'Ü' - 'u'.
+     *
+     * For example,
      * the display name "Phillips Thomas Jr" contains three tokens: "phillips", "thomas", and "jr".
      *
      * A match must begin at the start of a token.
@@ -520,25 +527,19 @@
         int seperatorCount = 0;
 
         ArrayList<SmartDialMatchPosition> partial = new ArrayList<SmartDialMatchPosition>();
-
         // Keep going until we reach the end of displayName
         while (nameStart < nameLength && queryStart < queryLength) {
             char ch = displayName.charAt(nameStart);
             // Strip diacritics from accented characters if any
             ch = remapAccentedChars(ch);
-            if ((ch >= 'A') && (ch <= 'Z')) {
-                // Simply change the ascii code to the lower case version instead of using
-                // toLowerCase for efficiency
-                ch += 32;
-            }
-            if ((ch >= 'a') && (ch <= 'z')) {
+            if (isLowercaseLatin(ch)) {
                 // a starts at index 0
                 if (LATIN_LETTERS_TO_DIGITS[ch - 'a'] != query.charAt(queryStart)) {
                     // we did not find a match
                     queryStart = 0;
                     seperatorCount = 0;
                     while (nameStart < nameLength &&
-                            !Character.isWhitespace(displayName.charAt(nameStart))) {
+                            isLowercaseLatin(remapAccentedChars(displayName.charAt(nameStart)))) {
                         nameStart++;
                     }
                     nameStart++;
@@ -555,12 +556,14 @@
                         // we matched the first character.
                         // branch off and see if we can find another match with the remaining
                         // characters in the query string and the remaining tokens
-                        //find the next space in the query string
-                        int j = nameStart;
-                        while (j < nameLength && displayName.charAt(j) != ' ') {
-                            j++;
+                        // find the next separator in the query string
+                        int j;
+                        for (j = nameStart; j < nameLength; j++) {
+                            if (!isLowercaseLatin(remapAccentedChars(displayName.charAt(j)))) {
+                                break;
+                            }
                         }
-                        // this means there is at least one character left after the space
+                        // this means there is at least one character left after the separator
                         if (j < nameLength - 1) {
                             final String remainder = displayName.substring(j + 1);
                             final ArrayList<SmartDialMatchPosition> partialTemp =
@@ -609,6 +612,13 @@
         return false;
     }
 
+    /*
+     * Returns true if the character is a lowercase latin character(i.e. non-separator).
+     */
+    private boolean isLowercaseLatin(char ch) {
+        return ch >= 'a' && ch <= 'z';
+    }
+
     public boolean matches(String displayName) {
         mMatchPositions.clear();
         return matchesCombination(displayName, mQuery, mMatchPositions);

diff --git a/tests/src/com/android/dialer/dialpad/SmartDialNameMatcherTest.java b/tests/src/com/android/dialer/dialpad/SmartDialNameMatcherTest.java
index 8b7ee03..08939b4 100644
--- a/tests/src/com/android/dialer/dialpad/SmartDialNameMatcherTest.java
+++ b/tests/src/com/android/dialer/dialpad/SmartDialNameMatcherTest.java

@@ -80,8 +80,19 @@
         checkMatches("William        John   Smith", "5764", true, 15, 16, 22, 25);
     }
 
-    // TODO: Do we want to make these pass anymore?
-    @Suppress
+    public void testMatches_InitialWithSeparator() {
+        // wjs matches (W)illiam (J)ohn (S)mith
+        checkMatches("William John-Smith", "957", true, 0, 1, 8, 9, 13, 14);
+        // wjsmit matches (W)illiam (J)ohn-(OShe)a
+        checkMatches("William John-O'Shea", "956743", true, 0, 1, 8, 9, 13, 18);
+        // wjohn matches (W)illiam-(John) Smith
+        checkMatches("William-John Smith", "95646", true, 0, 1, 8, 12);
+        // jsmi matches William (J)ohn-(Smi)th
+        checkMatches("William John-Smith", "5764", true, 8, 9, 13, 16);
+        // make sure multiple spaces don't mess things up
+        checkMatches("William        John---Smith", "5764", true, 15, 16, 22, 25);
+    }
+
     public void testMatches_repeatedSeparators() {
         // Simple match for single token
         checkMatches("John,,,,,Doe", "5646", true, 0, 4);
@@ -91,6 +102,15 @@
         checkMatches("John,,,,,Doe", "363", true, 9, 12);
     }
 
+    public void testMatches_LatinMix() {
+        // Latin + Chinese characters
+        checkMatches("Lee王力Wang宏", "59264", true, 0, 1, 5, 9);
+        // Latin + Japanese characters
+        checkMatches("千Abcd佳智Efgh佳IJKL", "222333444555", true, 1, 16);
+        // Latin + Arabic characters
+        checkMatches("Peterعبد الرحمنJames", "752637", true, 0, 1, 15, 20);
+    }
+
     public void testMatches_umlaut() {
         checkMatches("ÄÖÜäöü", "268268", true, 0, 6);
     }
commit	69ae7985ce26709b0acd9c376c751b8fb0ba4528	[log] [tgz]
author	Yorke Lee <yorkelee@google.com>	Thu Mar 21 18:22:18 2013 -0700
committer	Yorke Lee <yorkelee@google.com>	Mon Apr 08 11:56:03 2013 -0700
tree	7f8e4a06e6bd13fd386d104c63d089aeaef64c68
parent	f5bc56a398b20eed03df82f5dacd86aebc35d954 [diff]