Fix Smart dialing OOM for extremely long contacts
Names with an extremely large number of name tokens were
using exponentially increasing amounts of memory since we have
to insert entries for all possible initial name combinations.
Added a check in the trie insertion algorithm to only add initial
matches for the 1st 2, and the last 2 name tokens.
This change only affects search by initial matches for names exceeding
4 name tokens. Full token search for names of all lengths should still
work. E.g. "frank", "enstein" would still match "Dr Frank En Stein DDS".
However initial matching would be limited to the first 2 and last 2 tokens.
So "dfsd" or "fsd" would work, but "fes" or "fed" would not.
Also fixes a bug caused by integer overflow when calculating thresholds
for bucketing frequently used contacts
Bug 8737986
Change-Id: I804184368b78fe2fa407667ed83874d839c28115
diff --git a/src/com/android/dialer/dialpad/SmartDialCache.java b/src/com/android/dialer/dialpad/SmartDialCache.java
index 7a11b15..51e900a 100644
--- a/src/com/android/dialer/dialpad/SmartDialCache.java
+++ b/src/com/android/dialer/dialpad/SmartDialCache.java
@@ -111,10 +111,10 @@
public static final int PHONE_DISPLAY_NAME = 6;
// Current contacts - those contacted within the last 3 days (in milliseconds)
- final static long LAST_TIME_USED_CURRENT_MS = 3 * 24 * 60 * 60 * 1000;
+ final static long LAST_TIME_USED_CURRENT_MS = 3L * 24 * 60 * 60 * 1000;
// Recent contacts - those contacted within the last 30 days (in milliseconds)
- final static long LAST_TIME_USED_RECENT_MS = 30 * 24 * 60 * 60 * 1000;
+ final static long LAST_TIME_USED_RECENT_MS = 30L * 24 * 60 * 60 * 1000;
final static String TIME_SINCE_LAST_USED_MS =
"(? - " + Data.LAST_TIME_USED + ")";
diff --git a/src/com/android/dialer/dialpad/SmartDialTrie.java b/src/com/android/dialer/dialpad/SmartDialTrie.java
index 0de28a5..04ff64c 100644
--- a/src/com/android/dialer/dialpad/SmartDialTrie.java
+++ b/src/com/android/dialer/dialpad/SmartDialTrie.java
@@ -38,6 +38,15 @@
* that uses NANP numbers.</p>
*/
public class SmartDialTrie {
+ @VisibleForTesting
+ static class ParseInfo {
+ byte[] indexes;
+ int nthFirstTokenPos;
+ int nthLastTokenPos;
+ }
+
+ private static final int LAST_TOKENS_FOR_INITIALS = 2;
+ private static final int FIRST_TOKENS_FOR_INITIALS = 2;
final Node mRoot = new Node();
private int mSize = 0;
private final char[] mCharacterMap;
@@ -118,8 +127,9 @@
public void put(ContactNumber contact) {
// Preconvert the display name into a byte array containing indexes to avoid having to
// remap each character over multiple passes
- putForPrefix(contact, mRoot, toByteArray(contact.displayName), 0,
- contact.displayName.length(), true);
+ final ParseInfo info = parseToIndexes(contact.displayName, FIRST_TOKENS_FOR_INITIALS,
+ LAST_TOKENS_FOR_INITIALS);
+ putForPrefix(contact, mRoot, info, 0, true);
// We don't need to do the same for phone numbers since we only make one pass over them.
// Strip the calling code from the phone number here
if (!TextUtils.isEmpty(contact.phoneNumber)) {
@@ -215,22 +225,52 @@
return new int[0];
}
+ /**
+ * Converts the given characters into a byte array of index and returns it together with offset
+ * information in a {@link ParseInfo} data structure.
+ * @param chars Characters to convert into indexes
+ * @param firstNTokens The first n tokens we want the offset for
+ * @param lastNTokens The last n tokens we want the offset for
+ */
@VisibleForTesting
- /* package */ byte[] toByteArray(CharSequence chars) {
+ ParseInfo parseToIndexes(CharSequence chars, int firstNTokens, int lastNTokens) {
final int length = chars.length();
final byte[] result = new byte[length];
+ final ArrayList<Integer> offSets = new ArrayList<Integer>();
char c;
+ int tokenCount = 0;
+ boolean atSeparator = true;
for (int i = 0; i < length; i++) {
c = SmartDialNameMatcher.remapAccentedChars(chars.charAt(i));
- if (c >= 'a' && c <= 'z') {
- result[i] = (byte) (mCharacterMap[c - 'a'] - '0');
- } else if (c >= '0' && c <= '9') {
- result[i] = (byte) (c - '0');
+ if (c >= 'a' && c <= 'z' || c >= '0' && c <= '9') {
+ if (atSeparator) {
+ tokenCount++;
+ }
+ atSeparator = false;
+ if (c <= '9') {
+ // 0-9
+ result[i] = (byte) (c - '0');
+ } else {
+ // a-z
+ result[i] = (byte) (mCharacterMap[c - 'a'] - '0');
+ }
} else {
+ // Found the last character of the current token
+ if (!atSeparator) {
+ offSets.add(i);
+ }
result[i] = -1;
+ atSeparator = true;
}
}
- return result;
+
+ final ParseInfo info = new ParseInfo();
+ info.indexes = result;
+ info.nthFirstTokenPos = offSets.size() >= firstNTokens ? offSets.get(firstNTokens - 1) :
+ length - 1;
+ info.nthLastTokenPos = offSets.size() >= lastNTokens ? offSets.get(offSets.size() -
+ lastNTokens) : 0;
+ return info;
}
/**
@@ -248,7 +288,7 @@
*
* @param contact - Contact to add to the trie
* @param phoneNumber - Phone number of the contact
- * @param offset - The nth character of the phone number to start from
+ * @param offSet - The nth character of the phone number to start from
*/
private void putNumber(ContactNumber contact, String phoneNumber, int offSet) {
Preconditions.checkArgument(offSet < phoneNumber.length());
@@ -268,24 +308,29 @@
* Place an contact into the trie using at the provided node using the provided prefix. Uses as
* the input prefix a byte array instead of a CharSequence, as we will be traversing the array
* multiple times and it is more efficient to pre-convert the prefix into indexes before hand.
+ * Adds initial matches for the first token, and the last N tokens in the name.
*
* @param contact Contact to put
* @param root Root node to use as the starting point
- * @param prefix Sequence of bytes which represent the index at
+ * @param parseInfo ParseInfo data structure containing the converted byte array, as well as
+ * token offsets that determine which tokens should have entries added for initial
+ * search
* @param start - Starting index of the byte array
- * @param end - Last index(not inclusive) of the byte array
- * @param isFullName If true, prefix will be treated as a full name and recursive calls to add
- * initial matches as well as name token matches into the trie will be made.
+ * @param isFullName If true, prefix will be treated as a full name and everytime a new name
+ * token is encountered, the rest of the name segment is added into the trie.
*/
- private void putForPrefix(ContactNumber contact, Node root, byte[] prefix, int start, int end,
+ private void putForPrefix(ContactNumber contact, Node root, ParseInfo info, int start,
boolean isFullName) {
+ final boolean addInitialMatches = (start >= info.nthLastTokenPos ||
+ start <= info.nthFirstTokenPos);
+ final byte[] indexes = info.indexes;
Node current = root;
Node initialNode = root;
- final int length = end;
+ final int length = indexes.length;
boolean atSeparator = true;
byte index;
for (int i = start; i < length; i++) {
- index = prefix[i];
+ index = indexes[i];
if (index > -1) {
if (atSeparator) {
atSeparator = false;
@@ -293,14 +338,14 @@
// the root node
if (initialNode != this.mRoot) {
if (isFullName) {
- putForPrefix(contact, this.mRoot, prefix, i, prefix.length, false);
+ putForPrefix(contact, this.mRoot, info, i, false);
}
- if (initialNode != root) {
- putForPrefix(contact, initialNode, prefix, i, prefix.length,
- false);
+ if (addInitialMatches &&
+ (i >= info.nthLastTokenPos || i <= info.nthFirstTokenPos) &&
+ initialNode != root) {
+ putForPrefix(contact, initialNode, info, i, false);
}
}
-
// Set initial node to the node indexed by the first character of the current
// prefix
if (initialNode == root) {
diff --git a/tests/src/com/android/dialer/dialpad/SmartDialTrieTest.java b/tests/src/com/android/dialer/dialpad/SmartDialTrieTest.java
index cd87b66..876e00d 100644
--- a/tests/src/com/android/dialer/dialpad/SmartDialTrieTest.java
+++ b/tests/src/com/android/dialer/dialpad/SmartDialTrieTest.java
@@ -19,6 +19,7 @@
import static com.android.dialer.dialpad.SmartDialCache.ContactNumber;
import com.android.dialer.dialpad.SmartDialTrie.Node;
+import com.android.dialer.dialpad.SmartDialTrie.ParseInfo;
import android.test.suitebuilder.annotation.SmallTest;
@@ -129,47 +130,91 @@
public void testPutForInitialMatchesCombinations() {
final SmartDialTrie trie = new SmartDialTrie();
final ContactNumber alphabet = new ContactNumber(0, "abc def ghi jkl mno pqrs tuv wxyz",
- "1", "1", 2);
+ "12345678", "1", 2);
trie.put(alphabet);
- // 255 (2^8 - 1) combinations for the name, and 1 for the number
- assertEquals(256, trie.numEntries());
-
- // Test every single combination of the leading initials of each name token by generating
- // strings consisting of all combinations of the digits 2-9.
- final StringBuilder sb = new StringBuilder();
-
- // Create an array of bit positions 0b10000000, 0b01000000, 0b00100000, ...
- final int[] pos = new int[8];
- for (int i = 2; i <= 9; i++) {
- pos[i - 2] = 1 << (9 - i);
- }
- for (int i = 1; i < 256; i++) {
- sb.setLength(0);
- // If the bit at nth position is set to true, then add the nth initial to the target
- // string
- for (int j = 2; j <= 9; j++) {
- if ((i & pos[j - 2]) > 0) {
- sb.append(j);
- }
- }
- assertTrue(checkContains(trie, alphabet, sb.toString()));
- }
-
+ assertEquals(20, trie.numEntries());
+ // 8 name entries (abcdefghi..., defghi..., ...)
+ assertTrue(checkContains(trie, alphabet, "22233344455566677778889999"));
+ assertTrue(checkContains(trie, alphabet, "33344455566677778889999"));
+ assertTrue(checkContains(trie, alphabet, "44455566677778889999"));
+ assertTrue(checkContains(trie, alphabet, "55566677778889999"));
+ assertTrue(checkContains(trie, alphabet, "66677778889999"));
+ assertTrue(checkContains(trie, alphabet, "77778889999"));
+ assertTrue(checkContains(trie, alphabet, "8889999"));
+ assertTrue(checkContains(trie, alphabet, "9999"));
+ // 1 number entry
+ assertTrue(checkContains(trie, alphabet, "12345678"));
+ // 11 initial entries (adtw, adw, adt, ad, atw, at, aw, dt, dw, dtw, tw)
+ // 4c2(6) + 4c3(4) + 4c4(1)
+ assertTrue(checkContains(trie, alphabet, "2389999"));
+ assertTrue(checkContains(trie, alphabet, "239999"));
+ assertTrue(checkContains(trie, alphabet, "23888"));
+ assertTrue(checkContains(trie, alphabet, "2333"));
+ assertTrue(checkContains(trie, alphabet, "289999"));
+ assertTrue(checkContains(trie, alphabet, "2888"));
+ assertTrue(checkContains(trie, alphabet, "29999"));
+ assertTrue(checkContains(trie, alphabet, "3888"));
+ assertTrue(checkContains(trie, alphabet, "39999"));
+ assertTrue(checkContains(trie, alphabet, "389999"));
+ assertTrue(checkContains(trie, alphabet, "89999"));
}
- public void testSeparators() {
- SmartDialTrie trie = new SmartDialTrie();
- String name = "Mcdonald Jamie-Cullum";
- byte[] bytes = trie.toByteArray(name);
+ public void testCheckLongToken() {
+ final SmartDialTrie trie = new SmartDialTrie();
+ final ContactNumber alphabet = new ContactNumber(0, " aaaa bbbb cccc dddd eeee ffff gggg" +
+ " hhhh iiii jjjj kkkk llll mmmm nnnn oooo pppp qqqq rrrr ssss tttt uuuu vvvv " +
+ " wwww xxxx yyyy zzzz", "1", "1", 2);
+ // Make sure the check to prevent overly long tokens from causing an OOM kicks in
+ trie.put(alphabet);
+ assertTrue(checkContains(trie, alphabet, "2222"));
+ // 26 name entries (aaaabbbbcccc...., bbbbccccdddd...., ccccdddd...)
+ // 1 number entry
+ // 11 initial entries 4c2(6) + 4c3(4) + 4c4(1)
+ assertEquals(38, trie.numEntries());
+
+ final ContactNumber alphabet2 = new ContactNumber(0, "aaaabbbbccccddddeeeeffffgggg" +
+ "hhhhiiiijjjjkkkkllllmmmmnnnnooooppppqqqqrrrrssssttttuuuuvvvvwwwwxxxxyyyyzzzz",
+ "1", "1", 2);
+ trie.put(alphabet2);
+ // added one name, and one number entry
+ assertEquals(40, trie.numEntries());
+ }
+
+ public void testParseInfo() {
+ final SmartDialTrie trie = new SmartDialTrie();
+ final String name = "Mcdonald Jamie-Cullum";
+ final ParseInfo info = trie.parseToIndexes(name, 2, 2);
// Make sure the dash is correctly converted to a separator character
for (int i = 0; i < name.length(); i++) {
// separators at position 8 and 12
if (i == 8 || i == 14) {
- assertTrue(bytes[i] == -1);
+ assertTrue(info.indexes[i] == -1);
} else {
- assertFalse(bytes[i] == -1);
+ assertFalse(info.indexes[i] == -1);
}
}
+ assertEquals(14, info.nthFirstTokenPos);
+ assertEquals(8, info.nthLastTokenPos);
+
+ final String name2 = "aaa bbb ccc ddd eee fff ggg hhh iii jjj kkk";
+ final ParseInfo info2 = trie.parseToIndexes(name2, 2, 2);
+ assertEquals(7, info2.nthFirstTokenPos);
+ assertEquals(35, info2.nthLastTokenPos);
+
+ final String name3 = "this is- a,test name";
+ final ParseInfo info3 = trie.parseToIndexes(name3, 3, 3);
+ assertEquals(11, info3.nthFirstTokenPos);
+ assertEquals(8, info3.nthLastTokenPos);
+
+ final String name4 = "M c-Donald James";
+ final ParseInfo info4 = trie.parseToIndexes(name4, 2, 3);
+ assertEquals(3, info4.nthFirstTokenPos);
+ assertEquals(1, info4.nthLastTokenPos);
+
+ final String name5 = " Aa'Bb c dddd e'e";
+ final ParseInfo info5 = trie.parseToIndexes(name5, 4, 4);
+ assertEquals(21, info5.nthFirstTokenPos);
+ assertEquals(8, info5.nthLastTokenPos);
}
public void testAccentedCharacters() {