Find proper snippet in multi-line and large result.

Previous CL removed the use ContactsContract.snippetize(). This method
found the proper line in a multi-line results and then snippetized the
line if it was too long.

This CL adds that functionality back without using snippetize(). This
new code is faster because it only does text intensive processing when
the text is long. The previous snippetize method did processing for all
strings.  In addition, the old method iterated over the snippet multiple
times (i.e. first with contains, then subsequently tokenizes, etc).  This
change re-uses the initial search results and remembers the search
information so multiple iterations are not necessary.  In addition,
tokenizing has been optimized based on the match.  We only need to
chop off excess content rather than tokenizing the whole string.

This CL also makes snippet more friend to landscape mode. Previously,
the number of snippet tokens shown was hard-coded to 2 on each side.
Furthermore, this caused longer tokens to exceed screen real estate. Now
the number of tokens shown are determined more accurately by character
count versus real estate.  In landscape mode, this allows us to show
much more of the search result.

Finally, fixed a highlight problem when the search query contained
punctuation. For example, a search for {.ben.} would not match {ben}.
This Cl resolves that issue by cleaning the search query.

Bug: 5929143
Change-Id: I5c368e6de8b34ca912f86926f862a02f95199fa7
diff --git a/res/values-land/integers.xml b/res/values-land/integers.xml
index 44d1d5a..e0d1e05 100644
--- a/res/values-land/integers.xml
+++ b/res/values-land/integers.xml
@@ -16,4 +16,7 @@
   -->
 <resources>
     <integer name="contact_tile_column_count_in_favorites">4</integer>
+
+    <!-- The number of characters in the snippet before we need to tokenize and ellipse. -->
+    <integer name="snippet_length_before_tokenize">60</integer>
 </resources>
diff --git a/res/values-sw580dp-land/integers.xml b/res/values-sw580dp-land/integers.xml
index 03f5ed7..bd9eb0e 100644
--- a/res/values-sw580dp-land/integers.xml
+++ b/res/values-sw580dp-land/integers.xml
@@ -16,4 +16,7 @@
   -->
 <resources>
     <integer name="contact_tile_column_count_in_favorites">3</integer>
+
+    <!-- The number of characters in the snippet before we need to tokenize and ellipse. -->
+    <integer name="snippet_length_before_tokenize">20</integer>
 </resources>
diff --git a/res/values-sw580dp/integers.xml b/res/values-sw580dp/integers.xml
index 03f5ed7..5b1c92b 100644
--- a/res/values-sw580dp/integers.xml
+++ b/res/values-sw580dp/integers.xml
@@ -16,4 +16,9 @@
   -->
 <resources>
     <integer name="contact_tile_column_count_in_favorites">3</integer>
+
+    <!-- The number of characters in the snippet before we need to tokenize and ellipse. -->
+    <!-- Yikes, there is less space on a tablet!  This makes the search experience rather
+         poor. Another reason to get rid of the exist tablet layout. -->
+    <integer name="snippet_length_before_tokenize">15</integer>
 </resources>
diff --git a/res/values-sw680dp-land/integers.xml b/res/values-sw680dp-land/integers.xml
index 44d1d5a..a83cdff 100644
--- a/res/values-sw680dp-land/integers.xml
+++ b/res/values-sw680dp-land/integers.xml
@@ -16,4 +16,7 @@
   -->
 <resources>
     <integer name="contact_tile_column_count_in_favorites">4</integer>
+
+    <!-- The number of characters in the snippet before we need to tokenize and ellipse. -->
+    <integer name="snippet_length_before_tokenize">30</integer>
 </resources>
diff --git a/res/values-sw680dp/integers.xml b/res/values-sw680dp/integers.xml
index 3e47757..930adb3 100644
--- a/res/values-sw680dp/integers.xml
+++ b/res/values-sw680dp/integers.xml
@@ -16,4 +16,7 @@
   -->
 <resources>
     <integer name="contact_tile_column_count_in_favorites">2</integer>
+
+    <!-- The number of characters in the snippet before we need to tokenize and ellipse. -->
+    <integer name="snippet_length_before_tokenize">20</integer>
 </resources>
diff --git a/res/values/integers.xml b/res/values/integers.xml
index 73e9aee..f3b54a4 100644
--- a/res/values/integers.xml
+++ b/res/values/integers.xml
@@ -19,4 +19,7 @@
 
     <!--  Determines the number of columns in a ContactTileRow in the favorites tab -->
     <integer name="contact_tile_column_count_in_favorites">2</integer>
+
+    <!-- The number of characters in the snippet before we need to tokenize and ellipse. -->
+    <integer name="snippet_length_before_tokenize">30</integer>
 </resources>
diff --git a/src/com/android/contacts/common/list/ContactEntryListAdapter.java b/src/com/android/contacts/common/list/ContactEntryListAdapter.java
index 9ebf9a2..fb9c73a 100644
--- a/src/com/android/contacts/common/list/ContactEntryListAdapter.java
+++ b/src/com/android/contacts/common/list/ContactEntryListAdapter.java
@@ -35,6 +35,7 @@
 
 import com.android.contacts.common.ContactPhotoManager;
 import com.android.contacts.common.R;
+import com.android.contacts.common.util.SearchUtil;
 
 import java.util.HashSet;
 
@@ -225,7 +226,8 @@
         if (TextUtils.isEmpty(queryString)) {
             mUpperCaseQueryString = null;
         } else {
-            mUpperCaseQueryString = queryString.toUpperCase();
+            mUpperCaseQueryString = SearchUtil
+                    .cleanStartAndEndOfSearchQuery(queryString.toUpperCase()) ;
         }
     }
 
diff --git a/src/com/android/contacts/common/list/ContactListItemView.java b/src/com/android/contacts/common/list/ContactListItemView.java
index 8354fd9..89a67d5 100644
--- a/src/com/android/contacts/common/list/ContactListItemView.java
+++ b/src/com/android/contacts/common/list/ContactListItemView.java
@@ -48,6 +48,7 @@
 import com.android.contacts.common.ContactStatusUtil;
 import com.android.contacts.common.R;
 import com.android.contacts.common.format.PrefixHighlighter;
+import com.android.contacts.common.util.SearchUtil;
 import com.google.common.collect.Lists;
 
 import java.util.ArrayList;
@@ -1148,33 +1149,21 @@
         }
 
         String snippet = cursor.getString(summarySnippetColumnIndex);
+
         // Do client side snippeting if provider didn't do it
         final Bundle extras = cursor.getExtras();
         if (extras.getBoolean(ContactsContract.DEFERRED_SNIPPETING)) {
 
             final String query = extras.getString(ContactsContract.DEFERRED_SNIPPETING_QUERY);
-            if (TextUtils.isEmpty(snippet) || TextUtils.isEmpty(query)) {
-                snippet = null;
-            } else {
-                // If the name matches, don't show snippets.
-                int displayNameIndex = cursor.getColumnIndex(Contacts.DISPLAY_NAME);
-                if (displayNameIndex >= 0) {
 
-                    final String displayName = cursor.getString(displayNameIndex);
-                    if (TextUtils.isEmpty(displayName)) {
-                        snippet = null;
-                    } else {
-                        final String lowerQuery = query.toLowerCase();
-                        final String lowerDisplayName = displayName.toLowerCase();
-                        final List<String> nameTokens = split(lowerDisplayName);
-                        for (String nameToken : nameTokens) {
-                            if (nameToken.startsWith(lowerQuery)) {
-                                snippet = null;
-                            }
-                        }
-                    }
-                }
+            String displayName = null;
+            int displayNameIndex = cursor.getColumnIndex(Contacts.DISPLAY_NAME);
+            if (displayNameIndex >= 0) {
+                displayName = cursor.getString(displayNameIndex);
             }
+
+            snippet = updateSnippet(snippet, query, displayName);
+
         } else {
             if (snippet != null) {
                 int from = 0;
@@ -1207,9 +1196,116 @@
                 }
             }
         }
+
         setSnippet(snippet);
     }
 
+    /**
+     * Used for deferred snippets from the database. The contents come back as large strings which
+     * need to be extracted for display.
+     *
+     * @param snippet The snippet from the database.
+     * @param query The search query substring.
+     * @param displayName The contact display name.
+     * @return The proper snippet to display.
+     */
+    private String updateSnippet(String snippet, String query, String displayName) {
+
+        if (TextUtils.isEmpty(snippet) || TextUtils.isEmpty(query)) {
+            return null;
+        }
+        query = SearchUtil.cleanStartAndEndOfSearchQuery(query.toLowerCase());
+
+        // If the display name already contains the query term, return empty - snippets should
+        // not be needed in that case.
+        if (!TextUtils.isEmpty(displayName)) {
+            final String lowerDisplayName = displayName.toLowerCase();
+            final List<String> nameTokens = split(lowerDisplayName);
+            for (String nameToken : nameTokens) {
+                if (nameToken.startsWith(query)) {
+                    return null;
+                }
+            }
+        }
+
+        // The snippet may contain multiple data lines.
+        // Show the first line that matches the query.
+        final SearchUtil.MatchedLine matched = SearchUtil.findMatchingLine(snippet, query);
+
+        if (matched != null && matched.line != null) {
+            // Tokenize for long strings since the match may be at the end of it.
+            // Skip this part for short strings since the whole string will be displayed.
+            // Most contact strings are short so the snippetize method will be called infrequently.
+            final int lengthThreshold = getResources().getInteger(
+                    R.integer.snippet_length_before_tokenize);
+            if (matched.line.length() > lengthThreshold) {
+                return snippetize(matched.line, matched.startIndex, lengthThreshold);
+            } else {
+                return matched.line;
+            }
+        }
+
+        // No match found.
+        return null;
+    }
+
+    private String snippetize(String line, int matchIndex, int maxLength) {
+        // Show up to maxLength characters. But we only show full tokens so show the last full token
+        // up to maxLength characters. So as many starting tokens as possible before trying ending
+        // tokens.
+        int remainingLength = maxLength;
+        int tempRemainingLength = remainingLength;
+
+        // Start the end token after the matched query.
+        int index = matchIndex;
+        int endTokenIndex = index;
+
+        // Find the match token first.
+        while (index < line.length()) {
+            if (!Character.isLetterOrDigit(line.charAt(index))) {
+                endTokenIndex = index;
+                remainingLength = tempRemainingLength;
+                break;
+            }
+            tempRemainingLength--;
+            index++;
+        }
+
+        // Find as much content before the match.
+        index = matchIndex - 1;
+        tempRemainingLength = remainingLength;
+        int startTokenIndex = matchIndex;
+        while (index > -1 && tempRemainingLength > 0) {
+            if (!Character.isLetterOrDigit(line.charAt(index))) {
+                startTokenIndex = index;
+                remainingLength = tempRemainingLength;
+            }
+            tempRemainingLength--;
+            index--;
+        }
+
+        index = endTokenIndex;
+        tempRemainingLength = remainingLength;
+        // Find remaining content at after match.
+        while (index < line.length() && tempRemainingLength > 0) {
+            if (!Character.isLetterOrDigit(line.charAt(index))) {
+                endTokenIndex = index;
+            }
+            tempRemainingLength--;
+            index++;
+        }
+        // Append ellipse if there is content before or after.
+        final StringBuilder sb = new StringBuilder();
+        if (startTokenIndex > 0) {
+            sb.append("...");
+        }
+        sb.append(line.substring(startTokenIndex, endTokenIndex));
+        if (endTokenIndex < line.length()) {
+            sb.append("...");
+        }
+        return sb.toString();
+    }
+
     private static final Pattern SPLIT_PATTERN = Pattern.compile(
             "([\\w-\\.]+)@((?:[\\w]+\\.)+)([a-zA-Z]{2,4})|[\\w]+");
 
diff --git a/src/com/android/contacts/common/util/SearchUtil.java b/src/com/android/contacts/common/util/SearchUtil.java
new file mode 100644
index 0000000..b3428ff
--- /dev/null
+++ b/src/com/android/contacts/common/util/SearchUtil.java
@@ -0,0 +1,213 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.contacts.common.util;
+
+import com.google.common.annotations.VisibleForTesting;
+
+/**
+ * Methods related to search.
+ */
+public class SearchUtil {
+
+    public static class MatchedLine {
+
+        public int startIndex = -1;
+        public String line;
+
+        @Override
+        public String toString() {
+            return "MatchedLine{" +
+                    "line='" + line + '\'' +
+                    ", startIndex=" + startIndex +
+                    '}';
+        }
+    }
+
+    /**
+     * Given a string with lines delimited with '\n', finds the matching line to the given
+     * substring.
+     *
+     * @param contents The string to search.
+     * @param substring The substring to search for.
+     * @return A MatchedLine object containing the matching line and the startIndex of the substring
+     * match within that line.
+     */
+    public static MatchedLine findMatchingLine(String contents, String substring) {
+        final MatchedLine matched = new MatchedLine();
+
+        // Snippet may contain multiple lines separated by "\n".
+        // Locate the lines of the content that contain the substring.
+        final int index = SearchUtil.contains(contents, substring);
+        if (index != -1) {
+            // Match found.  Find the corresponding line.
+            int start = index - 1;
+            while (start > -1) {
+                if (contents.charAt(start) == '\n') {
+                    break;
+                }
+                start--;
+            }
+            int end = index + 1;
+            while (end < contents.length()) {
+                if (contents.charAt(end) == '\n') {
+                    break;
+                }
+                end++;
+            }
+            matched.line = contents.substring(start + 1, end);
+            matched.startIndex = index - (start + 1);
+        }
+        return matched;
+    }
+
+    /**
+     * Similar to String.contains() with two main differences:
+     * <p>
+     * 1) Only searches token prefixes.  A token is defined as any combination of letters or
+     * numbers.
+     * <p>
+     * 2) Returns the starting index where the substring is found.
+     *
+     * @param value The string to search.
+     * @param substring The substring to look for.
+     * @return The starting index where the substring is found. {@literal -1} if substring is not
+     *         found in value.
+     */
+    @VisibleForTesting
+    static int contains(String value, String substring) {
+        if (value.length() < substring.length()) {
+            return -1;
+        }
+
+        // i18n support
+        // Generate the code points for the substring once.
+        // There will be a maximum of substring.length code points.  But may be fewer.
+        // Since the array length is not an accurate size, we need to keep a separate variable.
+        final int[] substringCodePoints = new int[substring.length()];
+        int substringLength = 0;  // may not equal substring.length()!!
+        for (int i = 0; i < substring.length(); ) {
+            final int codePoint = Character.codePointAt(substring, i);
+            substringCodePoints[substringLength] = codePoint;
+            substringLength++;
+            i += Character.charCount(codePoint);
+        }
+
+        int valueCodePoint = 0;
+        for (int i = 0; i < value.length(); i = findNextTokenStart(value, i)) {
+            valueCodePoint = value.codePointAt(i);
+
+            int numMatch = 0;
+
+            // As an optimization, we lower here instead of making the parent do it.
+            //int substringCodePoint = substring.codePointAt(numMatch);
+            int valueCpIndex = 0;
+            int cp = Character.toLowerCase(valueCodePoint);
+            while (numMatch < substringLength && cp == substringCodePoints[numMatch]) {
+                numMatch++;
+                if (numMatch == substringLength) {
+                    // Must exit loop here otherwise code below may cause IndexOutOfBoundsException.
+                    // When query string matches content exactly.
+                    return i;
+                }
+                valueCpIndex += Character.charCount(cp);
+                cp = Character.toLowerCase(value.codePointAt(i + valueCpIndex));
+
+            }
+
+        }
+        return -1;
+    }
+
+    /**
+     * Find the start of the next token.  A token is composed of letters and numbers. Any other
+     * character are considered delimiters.
+     *
+     * @param line The string to search for the next token.
+     * @param startIndex The index to start searching.  0 based indexing.
+     * @return The index for the start of the next token.  line.length() if next token not found.
+     */
+    @VisibleForTesting
+    static int findNextTokenStart(String line, int startIndex) {
+        int index = startIndex;
+
+        // If already in token, eat remainder of token.
+        while (index <= line.length()) {
+            if (index == line.length()) {
+                // No more tokens.
+                return index;
+            }
+            final int codePoint = line.codePointAt(index);
+            if (!Character.isLetterOrDigit(codePoint)) {
+                break;
+            }
+            index += Character.charCount(codePoint);
+        }
+
+        // Out of token, eat all consecutive delimiters.
+        while (index <= line.length()) {
+            if (index == line.length()) {
+                return index;
+            }
+            final int codePoint = line.codePointAt(index);
+            if (Character.isLetterOrDigit(codePoint)) {
+                break;
+            }
+            index += Character.charCount(codePoint);
+        }
+
+        return index;
+    }
+
+    /**
+     * Anything other than letter and numbers are considered delimiters.  Remove start and end
+     * delimiters since they are not relevant to search.
+     *
+     * @param query The query string to clean.
+     * @return The cleaned query. Empty string if all characters are cleaned out.
+     */
+    public static String cleanStartAndEndOfSearchQuery(String query) {
+        int start = 0;
+        while (start < query.length()) {
+            int codePoint = query.codePointAt(start);
+            if (Character.isLetterOrDigit(codePoint)) {
+                break;
+            }
+            start += Character.charCount(codePoint);
+        }
+
+        if (start == query.length()) {
+            // All characters are delimiters.
+            return "";
+        }
+
+        int end = query.length() - 1;
+        while (end > -1) {
+            if (Character.isLowSurrogate(query.charAt(end))) {
+                // Assume valid i18n string.  There should be a matching high surrogate before it.
+                end--;
+            }
+            int codePoint = query.codePointAt(end);
+            if (Character.isLetterOrDigit(codePoint)) {
+                break;
+            }
+            end--;
+        }
+
+        // end is a letter or digit.
+        return query.substring(start, end + 1);
+    }
+}
diff --git a/tests/src/com/android/contacts/common/util/SearchUtilTest.java b/tests/src/com/android/contacts/common/util/SearchUtilTest.java
new file mode 100644
index 0000000..e03e871
--- /dev/null
+++ b/tests/src/com/android/contacts/common/util/SearchUtilTest.java
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.contacts.common.util;
+
+import android.test.suitebuilder.annotation.SmallTest;
+
+import junit.framework.TestCase;
+
+/**
+ * Unit tests for {@link SearchUtil}.
+ */
+@SmallTest
+public class SearchUtilTest extends TestCase {
+
+    public void testFindMatchingLine() {
+        final String actual = "this is a long test string.\nWith potentially many lines.\n" +
+                "test@google.com\nhello\nblah\n'leading punc";
+
+        SearchUtil.MatchedLine matched = SearchUtil.findMatchingLine(actual, "poten");
+        assertEquals("With potentially many lines.", matched.line);
+        assertEquals(5, matched.startIndex);
+
+        // Full line match.
+        matched = SearchUtil.findMatchingLine(actual, "hello");
+        assertEquals("hello", matched.line);
+        assertEquals(0, matched.startIndex);
+
+        // First line match
+        matched = SearchUtil.findMatchingLine(actual, "this");
+        assertEquals("this is a long test string.", matched.line);
+        assertEquals(0, matched.startIndex);
+
+        // Last line match
+        matched = SearchUtil.findMatchingLine(actual, "punc");
+        assertEquals("'leading punc", matched.line);
+        assertEquals(9, matched.startIndex);
+    }
+
+    public void testContains() {
+        final String actual = "this is a long test string.\nWith potentially many lines.\n" +
+                "test@google.com\nhello\nblah\n'leading punc";
+        assertEquals(0, SearchUtil.contains(actual, "this"));
+        assertEquals(10, SearchUtil.contains(actual, "lon"));
+
+        assertEquals(1, SearchUtil.contains("'leading punc", "lead"));
+        assertEquals(9, SearchUtil.contains("'leading punc", "punc"));
+
+    }
+
+    public void testContainsNotFound() {
+        final String actual = "this is a long test string.\nWith potentially many lines.\n" +
+                "test@google.com\nhello\nblah\n'leading punc";
+
+        // Non-prefix
+        assertEquals(-1, SearchUtil.contains(actual, "ith"));
+        assertEquals(-1, SearchUtil.contains(actual, "ing"));
+
+        // Complete misses
+        assertEquals(-1, SearchUtil.contains(actual, "thisx"));
+        assertEquals(-1, SearchUtil.contains(actual, "manyx"));
+        assertEquals(-1, SearchUtil.contains(actual, "hellox"));
+    }
+
+    public void testFindNextTokenStart() {
+        final String actual = "....hello.kitty";
+        //                     012345678901234
+
+        // Find first token.
+        assertEquals(4, SearchUtil.findNextTokenStart(actual, 0));
+        assertEquals(4, SearchUtil.findNextTokenStart(actual, 1));
+        assertEquals(4, SearchUtil.findNextTokenStart(actual, 2));
+        assertEquals(4, SearchUtil.findNextTokenStart(actual, 3));
+
+        // Find second token.
+        assertEquals(10, SearchUtil.findNextTokenStart(actual, 4));
+        assertEquals(10, SearchUtil.findNextTokenStart(actual, 5));
+        assertEquals(10, SearchUtil.findNextTokenStart(actual, 6));
+        assertEquals(10, SearchUtil.findNextTokenStart(actual, 7));
+        assertEquals(10, SearchUtil.findNextTokenStart(actual, 8));
+        assertEquals(10, SearchUtil.findNextTokenStart(actual, 9));
+
+        // No token.
+        assertEquals(actual.length(), SearchUtil.findNextTokenStart(actual, 10));
+        assertEquals(actual.length(), SearchUtil.findNextTokenStart(actual, 11));
+        assertEquals(actual.length(), SearchUtil.findNextTokenStart(actual, 12));
+        assertEquals(actual.length(), SearchUtil.findNextTokenStart(actual, 13));
+        assertEquals(actual.length(), SearchUtil.findNextTokenStart(actual, 14));
+    }
+
+    public void testCleanStartAndEndOfSearchQuery() {
+        assertEquals("test", SearchUtil.cleanStartAndEndOfSearchQuery("...test..."));
+        assertEquals("test", SearchUtil.cleanStartAndEndOfSearchQuery(" test "));
+        assertEquals("test", SearchUtil.cleanStartAndEndOfSearchQuery(" ||test"));
+        assertEquals("test", SearchUtil.cleanStartAndEndOfSearchQuery("test.."));
+    }
+
+}