android/PhoneNumberUtils.cpp - platform/external/sqlite - Git at Google

 /*
  * Copyright 2009, The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #include <string.h>

 namespace android {

 /* Generated by the following Python script. Values of country calling codes
    are from http://en.wikipedia.org/wiki/List_of_country_calling_codes

 #!/usr/bin/python
 import sys
 ccc_set_2digits = set([0, 1, 7,
                        20, 27, 28, 30, 31, 32, 33, 34, 36, 39, 40, 43, 44, 45,
                        46, 47, 48, 49, 51, 52, 53, 54, 55, 56, 57, 58, 60, 61,
                        62, 63, 64, 65, 66, 81, 82, 83, 84, 86, 89, 90, 91, 92,
                        93, 94, 95, 98])

 ONE_LINE_NUM = 10

 for i in xrange(100):
   if i % ONE_LINE_NUM == 0:
     sys.stdout.write('    ')
   if i in ccc_set_2digits:
     included = 'true'
   else:
     included = 'false'
   sys.stdout.write(included + ',')
   if ((i + 1) % ONE_LINE_NUM) == 0:
     sys.stdout.write('\n')
   else:
     sys.stdout.write(' ')
 */
 static bool two_length_country_code_map[100] = {
     true, true, false, false, false, false, false, true, false, false,
     false, false, false, false, false, false, false, false, false, false,
     true, false, false, false, false, false, false, true, true, false,
     true, true, true, true, true, false, true, false, false, true,
     true, false, false, true, true, true, true, true, true, true,
     false, true, true, true, true, true, true, true, true, false,
     true, true, true, true, true, true, true, false, false, false,
     false, false, false, false, false, false, false, false, false, false,
     false, true, true, true, true, false, true, false, false, true,
     true, true, true, true, true, true, false, false, true, false,
 };

 #define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0]))

 /**
  * Returns true if "ccc_candidate" expresses (part of ) some country calling
  * code.
  * Returns false otherwise.
  */
 static bool isCountryCallingCode(int ccc_candidate) {
     return ccc_candidate > 0 &&
             ccc_candidate < (int)ARRAY_SIZE(two_length_country_code_map) &&
             two_length_country_code_map[ccc_candidate];
 }

 /**
  * Returns interger corresponding to the input if input "ch" is
  * ISO-LATIN characters 0-9.
  * Returns -1 otherwise
  */
 static int tryGetISODigit (char ch)
 {
     if ('0' <= ch && ch <= '9') {
         return ch - '0';
     } else {
         return -1;
     }
 }

 /** True if c is ISO-LATIN characters 0-9, *, # , +  */
 static bool isNonSeparator(char ch)
 {
     return ('0' <= ch && ch <= '9') || ch == '*' || ch == '#' || ch == '+';
 }

 /**
  * Try to store the pointer to "new_ptr" which does not have trunk prefix.
  *
  * Currently this function simply ignore the first digit assuming it is
  * trunk prefix. Actually trunk prefix is different in each country.
  *
  * e.g.
  * "+79161234567" equals "89161234567" (Russian trunk digit is 8)
  * "+33123456789" equals "0123456789" (French trunk digit is 0)
  *
  */
 static bool tryGetTrunkPrefixOmittedStr(const char *str, size_t len,
                                         const char **new_ptr, size_t *new_len)
 {
     for (size_t i = 0 ; i < len ; i++) {
         char ch = str[i];
         if (tryGetISODigit(ch) >= 0) {
             if (new_ptr != NULL) {
                 *new_ptr = str + i + 1;
             }
             if (new_len != NULL) {
                 *new_len = len - (i + 1);
             }
             return true;
         } else if (isNonSeparator(ch)) {
             return false;
         }
     }

     return false;
 }

 /*
  * Note that this function does not strictly care the country calling code with
  * 3 length (like Morocco: +212), assuming it is enough to use the first two
  * digit to compare two phone numbers.
  */
 static int tryGetCountryCallingCode(const char *str, size_t len,
                                     const char **new_ptr, size_t *new_len)
 {
     // Rough regexp:
     //  ^[^0-9*#+]*((\+|0(0|11)\d\d?|166) [^0-9*#+] $
     //         0        1 2 3 45  6 7  89
     //
     // In all the states, this function ignores separator characters.
     // "166" is the special case for the call from Thailand to the US. Ugu!

     int state = 0;
     int ccc = 0;
     for (size_t i = 0 ; i < len ; i++ ) {
         char ch = str[i];
         switch (state) {
             case 0:
                 if      (ch == '+') state = 1;
                 else if (ch == '0') state = 2;
                 else if (ch == '1') state = 8;
                 else if (isNonSeparator(ch)) return -1;
             break;

             case 2:
                 if      (ch == '0') state = 3;
                 else if (ch == '1') state = 4;
                 else if (isNonSeparator(ch)) return -1;
             break;

             case 4:
                 if      (ch == '1') state = 5;
                 else if (isNonSeparator(ch)) return -1;
             break;

             case 1:
             case 3:
             case 5:
             case 6:
             case 7:
                 {
                     int ret = tryGetISODigit(ch);
                     if (ret > 0) {
                         ccc = ccc * 10 + ret;
                         if (ccc >= 100 || isCountryCallingCode(ccc)) {
                             if (new_ptr != NULL) {
                                 *new_ptr = str + i + 1;
                             }
                             if (new_len != NULL) {
                                 *new_len = len - (i + 1);
                             }
                             return ccc;
                         }
                         if (state == 1 || state == 3 || state == 5) {
                             state = 6;
                         } else {
                             state++;
                         }
                     } else if (isNonSeparator(ch)) {
                         return -1;
                     }
                 }
                 break;
             case 8:
                 if (ch == '6') state = 9;
                 else if (isNonSeparator(ch)) return -1;
                 break;
             case 9:
                 if (ch == '6') {
                     if (new_ptr != NULL) {
                         *new_ptr = str + i + 1;
                     }
                     if (new_len != NULL) {
                         *new_len = len - (i + 1);
                     }
                     return 66;
                 }
                 break;
             default:
                 return -1;
         }
     }

     return -1;
 }

 /**
  * Return true if the prefix of "ch" is "ignorable". Here, "ignorable" means
  * that "ch" has only one digit and separater characters. The one digit is
  * assumed to be trunk prefix.
  */
 static bool checkPrefixIsIgnorable(const char* ch, int i) {
     bool trunk_prefix_was_read = false;
     while (i >= 0) {
         if (tryGetISODigit(ch[i]) >= 0) {
             if (trunk_prefix_was_read) {
                 // More than one digit appeared, meaning that "a" and "b"
                 // is different.
                 return false;
             } else {
                 // Ignore just one digit, assuming it is trunk prefix.
                 trunk_prefix_was_read = true;
             }
         } else if (isNonSeparator(ch[i])) {
             // Trunk prefix is a digit, not "*", "#"...
             return false;
         }
         i--;
     }

     return true;
 }

 /**
  * Compare phone numbers a and b, return true if they're identical
  * enough for caller ID purposes.
  *
  * Assume NULL as 0-length string.
  *
  * Detailed information:
  * Currently (as of 2009-06-12), we cannot depend on the locale given from the
  * OS. For example, current Android does not accept "en_JP", meaning
  * "the display language is English but the phone should be in Japan", but
  * en_US, es_US, etc. So we cannot identify which digit is valid trunk prefix
  * in the country where the phone is used. More specifically, "880-1234-1234"
  * is not valid phone number in Japan since the trunk prefix in Japan is not 8
  * but 0 (correct number should be "080-1234-1234"), while Russian trunk prefix
  * is 8. Also, we cannot know whether the country where users live has trunk
  * prefix itself. So, we cannot determine whether "+81-80-1234-1234" is NOT
  * same as "880-1234-1234" (while "+81-80-1234-1234" is same as "080-1234-1234"
  * and we can determine "880-1234-1234" is different from "080-1234-1234").
  *
  * In the future, we should handle trunk prefix more correctly, but as of now,
  * we just ignore it...
  */
 bool phone_number_compare(const char* a, const char* b)
 {
     size_t len_a = 0;
     size_t len_b = 0;
     if (a == NULL) {
         a = "";
     } else {
         len_a = strlen(a);
     }
     if (b == NULL) {
         b = "";
     } else {
         len_b = strlen(b);
     }

     const char* tmp_a = NULL;
     const char* tmp_b = NULL;
     size_t tmp_len_a = len_a;
     size_t tmp_len_b = len_b;

     int ccc_a = tryGetCountryCallingCode(a, len_a, &tmp_a, &tmp_len_a);
     int ccc_b = tryGetCountryCallingCode(b, len_b, &tmp_b, &tmp_len_b);
     bool ok_to_ignore_prefix = true;
     if (ccc_a >= 0 && ccc_b >= 0) {
         if (ccc_a != ccc_b) {
             // Different Country Calling Code. Must be different phone number.
             return false;
         }
         // When both have ccc, do not ignore trunk prefix. Without this,
         // "+81123123" becomes same as "+810123123" (+81 == Japan)
         ok_to_ignore_prefix = false;
     } else if (ccc_a < 0 && ccc_b < 0) {
         // When both do not have ccc, do not ignore trunk prefix. Without this,
         // "123123" becomes same as "0123123"
         ok_to_ignore_prefix = false;
     } else {
         if (ccc_a < 0) {
             tryGetTrunkPrefixOmittedStr(a, len_a, &tmp_a, &tmp_len_a);
         }
         if (ccc_b < 0) {
             tryGetTrunkPrefixOmittedStr(b, len_b, &tmp_b, &tmp_len_b);
         }
     }

     if (tmp_a != NULL) {
         a = tmp_a;
         len_a = tmp_len_a;
     }
     if (tmp_b != NULL) {
         b = tmp_b;
         len_b = tmp_len_b;
     }

     int i_a = len_a - 1;
     int i_b = len_b - 1;
     while (i_a >= 0 && i_b >= 0) {
         bool skip_compare = false;
         char ch_a = a[i_a];
         char ch_b = b[i_b];
         if (!isNonSeparator(ch_a)) {
             i_a--;
             skip_compare = true;
         }
         if (!isNonSeparator(ch_b)) {
             i_b--;
             skip_compare = true;
         }

         if (!skip_compare) {
             if (ch_a != ch_b) {
                 return false;
             }
             i_a--;
             i_b--;
         }
     }

     if (ok_to_ignore_prefix) {
         if (!checkPrefixIsIgnorable(a, i_a)) {
             return false;
         }
         if (!checkPrefixIsIgnorable(b, i_b)) {
             return false;
         }
     } else {
         // In the US, 1-650-555-1234 must be equal to 650-555-1234,
         // while 090-1234-1234 must not be equalt to 90-1234-1234 in Japan.
         // This request exists just in US (with 1 trunk (NDD) prefix).
         //
         // At least, in this "rough" comparison, we should ignore the prefix
         // '1', so if the remaining non-separator number is 0, we ignore it
         // just once.
         bool may_be_namp = true;
         while (i_a >= 0) {
             const char ch_a = a[i_a];
             if (isNonSeparator(ch_a)) {
                 if (may_be_namp && tryGetISODigit(ch_a) == 1) {
                     may_be_namp = false;
                 } else {
                     return false;
                 }
             }
             i_a--;
         }
         while (i_b >= 0) {
             const char ch_b = b[i_b];
             if (isNonSeparator(ch_b)) {
                 if (may_be_namp && tryGetISODigit(ch_b) == 1) {
                     may_be_namp = false;
                 } else {
                     return false;
                 }
             }
             i_b--;
         }
     }

     return true;
 }

 } // namespace android
	/*
	* Copyright 2009, The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#include <string.h>

	namespace android {

	/* Generated by the following Python script. Values of country calling codes
	are from http://en.wikipedia.org/wiki/List_of_country_calling_codes

	#!/usr/bin/python
	import sys
	ccc_set_2digits = set([0, 1, 7,
	20, 27, 28, 30, 31, 32, 33, 34, 36, 39, 40, 43, 44, 45,
	46, 47, 48, 49, 51, 52, 53, 54, 55, 56, 57, 58, 60, 61,
	62, 63, 64, 65, 66, 81, 82, 83, 84, 86, 89, 90, 91, 92,
	93, 94, 95, 98])

	ONE_LINE_NUM = 10

	for i in xrange(100):
	if i % ONE_LINE_NUM == 0:
	sys.stdout.write(' ')
	if i in ccc_set_2digits:
	included = 'true'
	else:
	included = 'false'
	sys.stdout.write(included + ',')
	if ((i + 1) % ONE_LINE_NUM) == 0:
	sys.stdout.write('\n')
	else:
	sys.stdout.write(' ')
	*/
	static bool two_length_country_code_map[100] = {
	true, true, false, false, false, false, false, true, false, false,
	false, false, false, false, false, false, false, false, false, false,
	true, false, false, false, false, false, false, true, true, false,
	true, true, true, true, true, false, true, false, false, true,
	true, false, false, true, true, true, true, true, true, true,
	false, true, true, true, true, true, true, true, true, false,
	true, true, true, true, true, true, true, false, false, false,
	false, false, false, false, false, false, false, false, false, false,
	false, true, true, true, true, false, true, false, false, true,
	true, true, true, true, true, true, false, false, true, false,
	};

	#define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0]))

	/**
	* Returns true if "ccc_candidate" expresses (part of ) some country calling
	* code.
	* Returns false otherwise.
	*/
	static bool isCountryCallingCode(int ccc_candidate) {
	return ccc_candidate > 0 &&
	ccc_candidate < (int)ARRAY_SIZE(two_length_country_code_map) &&
	two_length_country_code_map[ccc_candidate];
	}

	/**
	* Returns interger corresponding to the input if input "ch" is
	* ISO-LATIN characters 0-9.
	* Returns -1 otherwise
	*/
	static int tryGetISODigit (char ch)
	{
	if ('0' <= ch && ch <= '9') {
	return ch - '0';
	} else {
	return -1;
	}
	}

	/** True if c is ISO-LATIN characters 0-9, , # , + /
	static bool isNonSeparator(char ch)
	{
	return ('0' <= ch && ch <= '9') \|\| ch == '*' \|\| ch == '#' \|\| ch == '+';
	}

	/**
	* Try to store the pointer to "new_ptr" which does not have trunk prefix.
	*
	* Currently this function simply ignore the first digit assuming it is
	* trunk prefix. Actually trunk prefix is different in each country.
	*
	* e.g.
	* "+79161234567" equals "89161234567" (Russian trunk digit is 8)
	* "+33123456789" equals "0123456789" (French trunk digit is 0)
	*
	*/
	static bool tryGetTrunkPrefixOmittedStr(const char *str, size_t len,
	const char *new_ptr, size_t new_len)
	{
	for (size_t i = 0 ; i < len ; i++) {
	char ch = str[i];
	if (tryGetISODigit(ch) >= 0) {
	if (new_ptr != NULL) {
	*new_ptr = str + i + 1;
	}
	if (new_len != NULL) {
	*new_len = len - (i + 1);
	}
	return true;
	} else if (isNonSeparator(ch)) {
	return false;
	}
	}

	return false;
	}

	/*
	* Note that this function does not strictly care the country calling code with
	* 3 length (like Morocco: +212), assuming it is enough to use the first two
	* digit to compare two phone numbers.
	*/
	static int tryGetCountryCallingCode(const char *str, size_t len,
	const char *new_ptr, size_t new_len)
	{
	// Rough regexp:
	// ^[^0-9#+]((\+\|0(0\|11)\d\d?\|166) [^0-9*#+] $
	// 0 1 2 3 45 6 7 89
	//
	// In all the states, this function ignores separator characters.
	// "166" is the special case for the call from Thailand to the US. Ugu!

	int state = 0;
	int ccc = 0;
	for (size_t i = 0 ; i < len ; i++ ) {
	char ch = str[i];
	switch (state) {
	case 0:
	if (ch == '+') state = 1;
	else if (ch == '0') state = 2;
	else if (ch == '1') state = 8;
	else if (isNonSeparator(ch)) return -1;
	break;

	case 2:
	if (ch == '0') state = 3;
	else if (ch == '1') state = 4;
	else if (isNonSeparator(ch)) return -1;
	break;

	case 4:
	if (ch == '1') state = 5;
	else if (isNonSeparator(ch)) return -1;
	break;

	case 1:
	case 3:
	case 5:
	case 6:
	case 7:
	{
	int ret = tryGetISODigit(ch);
	if (ret > 0) {
	ccc = ccc * 10 + ret;
	if (ccc >= 100 \|\| isCountryCallingCode(ccc)) {
	if (new_ptr != NULL) {
	*new_ptr = str + i + 1;
	}
	if (new_len != NULL) {
	*new_len = len - (i + 1);
	}
	return ccc;
	}
	if (state == 1 \|\| state == 3 \|\| state == 5) {
	state = 6;
	} else {
	state++;
	}
	} else if (isNonSeparator(ch)) {
	return -1;
	}
	}
	break;
	case 8:
	if (ch == '6') state = 9;
	else if (isNonSeparator(ch)) return -1;
	break;
	case 9:
	if (ch == '6') {
	if (new_ptr != NULL) {
	*new_ptr = str + i + 1;
	}
	if (new_len != NULL) {
	*new_len = len - (i + 1);
	}
	return 66;
	}
	break;
	default:
	return -1;
	}
	}

	return -1;
	}

	/**
	* Return true if the prefix of "ch" is "ignorable". Here, "ignorable" means
	* that "ch" has only one digit and separater characters. The one digit is
	* assumed to be trunk prefix.
	*/
	static bool checkPrefixIsIgnorable(const char* ch, int i) {
	bool trunk_prefix_was_read = false;
	while (i >= 0) {
	if (tryGetISODigit(ch[i]) >= 0) {
	if (trunk_prefix_was_read) {
	// More than one digit appeared, meaning that "a" and "b"
	// is different.
	return false;
	} else {
	// Ignore just one digit, assuming it is trunk prefix.
	trunk_prefix_was_read = true;
	}
	} else if (isNonSeparator(ch[i])) {
	// Trunk prefix is a digit, not "*", "#"...
	return false;
	}
	i--;
	}

	return true;
	}

	/**
	* Compare phone numbers a and b, return true if they're identical
	* enough for caller ID purposes.
	*
	* Assume NULL as 0-length string.
	*
	* Detailed information:
	* Currently (as of 2009-06-12), we cannot depend on the locale given from the
	* OS. For example, current Android does not accept "en_JP", meaning
	* "the display language is English but the phone should be in Japan", but
	* en_US, es_US, etc. So we cannot identify which digit is valid trunk prefix
	* in the country where the phone is used. More specifically, "880-1234-1234"
	* is not valid phone number in Japan since the trunk prefix in Japan is not 8
	* but 0 (correct number should be "080-1234-1234"), while Russian trunk prefix
	* is 8. Also, we cannot know whether the country where users live has trunk
	* prefix itself. So, we cannot determine whether "+81-80-1234-1234" is NOT
	* same as "880-1234-1234" (while "+81-80-1234-1234" is same as "080-1234-1234"
	* and we can determine "880-1234-1234" is different from "080-1234-1234").
	*
	* In the future, we should handle trunk prefix more correctly, but as of now,
	* we just ignore it...
	*/
	bool phone_number_compare(const char* a, const char* b)
	{
	size_t len_a = 0;
	size_t len_b = 0;
	if (a == NULL) {
	a = "";
	} else {
	len_a = strlen(a);
	}
	if (b == NULL) {
	b = "";
	} else {
	len_b = strlen(b);
	}

	const char* tmp_a = NULL;
	const char* tmp_b = NULL;
	size_t tmp_len_a = len_a;
	size_t tmp_len_b = len_b;

	int ccc_a = tryGetCountryCallingCode(a, len_a, &tmp_a, &tmp_len_a);
	int ccc_b = tryGetCountryCallingCode(b, len_b, &tmp_b, &tmp_len_b);
	bool ok_to_ignore_prefix = true;
	if (ccc_a >= 0 && ccc_b >= 0) {
	if (ccc_a != ccc_b) {
	// Different Country Calling Code. Must be different phone number.
	return false;
	}
	// When both have ccc, do not ignore trunk prefix. Without this,
	// "+81123123" becomes same as "+810123123" (+81 == Japan)
	ok_to_ignore_prefix = false;
	} else if (ccc_a < 0 && ccc_b < 0) {
	// When both do not have ccc, do not ignore trunk prefix. Without this,
	// "123123" becomes same as "0123123"
	ok_to_ignore_prefix = false;
	} else {
	if (ccc_a < 0) {
	tryGetTrunkPrefixOmittedStr(a, len_a, &tmp_a, &tmp_len_a);
	}
	if (ccc_b < 0) {
	tryGetTrunkPrefixOmittedStr(b, len_b, &tmp_b, &tmp_len_b);
	}
	}

	if (tmp_a != NULL) {
	a = tmp_a;
	len_a = tmp_len_a;
	}
	if (tmp_b != NULL) {
	b = tmp_b;
	len_b = tmp_len_b;
	}

	int i_a = len_a - 1;
	int i_b = len_b - 1;
	while (i_a >= 0 && i_b >= 0) {
	bool skip_compare = false;
	char ch_a = a[i_a];
	char ch_b = b[i_b];
	if (!isNonSeparator(ch_a)) {
	i_a--;
	skip_compare = true;
	}
	if (!isNonSeparator(ch_b)) {
	i_b--;
	skip_compare = true;
	}

	if (!skip_compare) {
	if (ch_a != ch_b) {
	return false;
	}
	i_a--;
	i_b--;
	}
	}

	if (ok_to_ignore_prefix) {
	if (!checkPrefixIsIgnorable(a, i_a)) {
	return false;
	}
	if (!checkPrefixIsIgnorable(b, i_b)) {
	return false;
	}
	} else {
	// In the US, 1-650-555-1234 must be equal to 650-555-1234,
	// while 090-1234-1234 must not be equalt to 90-1234-1234 in Japan.
	// This request exists just in US (with 1 trunk (NDD) prefix).
	//
	// At least, in this "rough" comparison, we should ignore the prefix
	// '1', so if the remaining non-separator number is 0, we ignore it
	// just once.
	bool may_be_namp = true;
	while (i_a >= 0) {
	const char ch_a = a[i_a];
	if (isNonSeparator(ch_a)) {
	if (may_be_namp && tryGetISODigit(ch_a) == 1) {
	may_be_namp = false;
	} else {
	return false;
	}
	}
	i_a--;
	}
	while (i_b >= 0) {
	const char ch_b = b[i_b];
	if (isNonSeparator(ch_b)) {
	if (may_be_namp && tryGetISODigit(ch_b) == 1) {
	may_be_namp = false;
	} else {
	return false;
	}
	}
	i_b--;
	}
	}

	return true;
	}

	} // namespace android