Fix contacts index labels for i18n
Switch ContactsProvider to using ICU for generation of index labels,
and remove custom KO and JA code. Add i18n test cases.
Bug:7351596
Change-Id: I7ac25add8b29ff2c6c395f04a83b279b541e4125
diff --git a/android/Android.mk b/android/Android.mk
index 151a5cb..0bb78d3 100644
--- a/android/Android.mk
+++ b/android/Android.mk
@@ -2,7 +2,6 @@
libsqlite3_android_local_src_files := \
PhoneNumberUtils.cpp \
- PhoneticStringUtils.cpp \
OldPhoneNumberUtils.cpp \
PhonebookIndex.cpp \
sqlite3_android.cpp
@@ -10,7 +9,8 @@
libsqlite3_android_c_includes := \
external/sqlite/dist \
external/icu4c/i18n \
- external/icu4c/common
+ external/icu4c/common \
+ frameworks/native/include
include $(CLEAR_VARS)
LOCAL_SRC_FILES:= $(libsqlite3_android_local_src_files)
@@ -26,24 +26,6 @@
include $(BUILD_HOST_STATIC_LIBRARY)
endif
-# Test for PhoneticStringUtils
-include $(CLEAR_VARS)
-
-LOCAL_MODULE:= libsqlite3_phonetic_string_utils_test
-
-LOCAL_CFLAGS += -Wall -Werror
-
-LOCAL_SRC_FILES := \
- PhoneticStringUtils.cpp \
- PhoneticStringUtilsTest.cpp
-
-LOCAL_MODULE_TAGS := optional
-
-LOCAL_SHARED_LIBRARIES := \
- libutils
-
-include $(BUILD_EXECUTABLE)
-
# Test for PhoneNumberUtils
#
# You can also test this in Unix, like this:
@@ -71,3 +53,28 @@
LOCAL_MODULE_TAGS := optional
include $(BUILD_EXECUTABLE)
+
+ifeq ($(WITH_HOST_DALVIK),true)
+ include $(CLEAR_VARS)
+
+ LOCAL_MODULE:= libsqlite3_phone_book_index_test
+
+ LOCAL_SRC_FILES := \
+ PhonebookIndex.cpp \
+ PhonebookIndexTest.cpp
+
+ LOCAL_C_INCLUDES := \
+ external/icu4c/i18n \
+ external/icu4c/common \
+ frameworks/native/include
+
+ LOCAL_MODULE_TAGS := optional
+
+ LOCAL_SHARED_LIBRARIES := \
+ libicui18n libicuuc
+
+ LOCAL_STATIC_LIBRARIES := \
+ libutils libcutils
+
+ include $(BUILD_HOST_EXECUTABLE)
+endif
diff --git a/android/PhonebookIndex.cpp b/android/PhonebookIndex.cpp
index 5cc26e5..68674f4 100644
--- a/android/PhonebookIndex.cpp
+++ b/android/PhonebookIndex.cpp
@@ -14,192 +14,193 @@
* limitations under the License.
*/
+#include <stdlib.h>
#include <ctype.h>
#include <string.h>
+#include <stdio.h>
+#include <unicode/alphaindex.h>
#include <unicode/ucol.h>
#include <unicode/uiter.h>
#include <unicode/ustring.h>
#include <unicode/utypes.h>
+#include <unicode/uloc.h>
+#include <utils/Mutex.h>
+#include <utils/RefBase.h>
#include "PhonebookIndex.h"
-#include "PhoneticStringUtils.h"
#define MIN_OUTPUT_SIZE 6 // Minimum required size for the output buffer (in bytes)
namespace android {
-// IMPORTANT! Keep the codes below SORTED. We are doing a binary search on the array
-static UChar DEFAULT_CHAR_MAP[] = {
- 0x00C6, 'A', // AE
- 0x00DF, 'S', // Etzett
- 0x1100, 0x3131, // HANGUL LETTER KIYEOK
- 0x1101, 0x3132, // HANGUL LETTER SSANGKIYEOK
- 0x1102, 0x3134, // HANGUL LETTER NIEUN
- 0x1103, 0x3137, // HANGUL LETTER TIKEUT
- 0x1104, 0x3138, // HANGUL LETTER SSANGTIKEUT
- 0x1105, 0x3139, // HANGUL LETTER RIEUL
- 0x1106, 0x3141, // HANGUL LETTER MIEUM
- 0x1107, 0x3142, // HANGUL LETTER PIEUP
- 0x1108, 0x3143, // HANGUL LETTER SSANGPIEUP
- 0x1109, 0x3145, // HANGUL LETTER SIOS
- 0x110A, 0x3146, // HANGUL LETTER SSANGSIOS
- 0x110B, 0x3147, // HANGUL LETTER IEUNG
- 0x110C, 0x3148, // HANGUL LETTER CIEUC
- 0x110D, 0x3149, // HANGUL LETTER SSANGCIEUC
- 0x110E, 0x314A, // HANGUL LETTER CHIEUCH
- 0x110F, 0x314B, // HANGUL LETTER KHIEUKH
- 0x1110, 0x314C, // HANGUL LETTER THIEUTH
- 0x1111, 0x314D, // HANGUL LETTER PHIEUPH
- 0x1112, 0x314E, // HANGUL LETTER HIEUH
- 0x111A, 0x3140, // HANGUL LETTER RIEUL-HIEUH
- 0x1121, 0x3144, // HANGUL LETTER PIEUP-SIOS
- 0x1161, 0x314F, // HANGUL LETTER A
- 0x1162, 0x3150, // HANGUL LETTER AE
- 0x1163, 0x3151, // HANGUL LETTER YA
- 0x1164, 0x3152, // HANGUL LETTER YAE
- 0x1165, 0x3153, // HANGUL LETTER EO
- 0x1166, 0x3154, // HANGUL LETTER E
- 0x1167, 0x3155, // HANGUL LETTER YEO
- 0x1168, 0x3156, // HANGUL LETTER YE
- 0x1169, 0x3157, // HANGUL LETTER O
- 0x116A, 0x3158, // HANGUL LETTER WA
- 0x116B, 0x3159, // HANGUL LETTER WAE
- 0x116C, 0x315A, // HANGUL LETTER OE
- 0x116D, 0x315B, // HANGUL LETTER YO
- 0x116E, 0x315C, // HANGUL LETTER U
- 0x116F, 0x315D, // HANGUL LETTER WEO
- 0x1170, 0x315E, // HANGUL LETTER WE
- 0x1171, 0x315F, // HANGUL LETTER WI
- 0x1172, 0x3160, // HANGUL LETTER YU
- 0x1173, 0x3161, // HANGUL LETTER EU
- 0x1174, 0x3162, // HANGUL LETTER YI
- 0x1175, 0x3163, // HANGUL LETTER I
- 0x11AA, 0x3133, // HANGUL LETTER KIYEOK-SIOS
- 0x11AC, 0x3135, // HANGUL LETTER NIEUN-CIEUC
- 0x11AD, 0x3136, // HANGUL LETTER NIEUN-HIEUH
- 0x11B0, 0x313A, // HANGUL LETTER RIEUL-KIYEOK
- 0x11B1, 0x313B, // HANGUL LETTER RIEUL-MIEUM
- 0x11B3, 0x313D, // HANGUL LETTER RIEUL-SIOS
- 0x11B4, 0x313E, // HANGUL LETTER RIEUL-THIEUTH
- 0x11B5, 0x313F, // HANGUL LETTER RIEUL-PHIEUPH
+// Wrapper class to enable using libutil SmartPointers with AlphabeticIndex.
+class AlphabeticIndexRef : public RefBase {
+public:
+ AlphabeticIndexRef(const char *locale, UErrorCode &status) :
+ m_index(locale, status), m_locale(NULL), m_isJapanese(false) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ m_locale = strdup(locale);
+ if (m_locale == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ char language[4];
+ uloc_getLanguage(locale, language, sizeof(language), &status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+ m_isJapanese = (strcmp(language, ULOC_JAPANESE) == 0);
+ }
+ virtual ~AlphabeticIndexRef() { free(m_locale); }
+
+ AlphabeticIndex& operator*() { return m_index; }
+ AlphabeticIndex* operator->() { return &m_index; }
+
+ bool isLocale(const char *locale) const {
+ return (locale != NULL && m_locale != NULL &&
+ strcmp(m_locale, locale) == 0);
+ }
+ bool isJapanese() const { return m_isJapanese; }
+ int32_t getLabel(int32_t bucketIndex, UChar *labelBuf, int32_t labelBufSize);
+
+private:
+ AlphabeticIndex m_index;
+ char *m_locale;
+ bool m_isJapanese;
};
-/**
- * Binary search to map an individual character to the corresponding phone book index.
- */
-static UChar map_character(UChar c, UChar * char_map, int32_t length) {
- int from = 0, to = length;
- while (from < to) {
- int m = ((to + from) >> 1) & ~0x1; // Only consider even positions
- UChar cm = char_map[m];
- if (cm == c) {
- return char_map[m + 1];
- } else if (cm < c) {
- from = m + 2;
- } else {
- to = m;
+int32_t AlphabeticIndexRef::getLabel(int32_t bucketIndex, UChar *labelBuf,
+ int32_t labelBufSize) {
+ UErrorCode status = U_ZERO_ERROR;
+ m_index.resetBucketIterator(status);
+ if (U_FAILURE(status)) {
+ return -1;
}
- }
- return 0;
+ for(int i = 0; i <= bucketIndex; ++i) {
+ if (!m_index.nextBucket(status) || U_FAILURE(status)) {
+ return -1;
+ }
+ }
+
+ int32_t len;
+ if (m_index.getBucketLabelType() == U_ALPHAINDEX_NORMAL) {
+ len = m_index.getBucketLabel().extract(labelBuf, labelBufSize, status);
+ if (U_FAILURE(status)) {
+ return -1;
+ }
+ } else {
+ // Use no label for underflow/inflow/overflow buckets
+ labelBuf[0] = '\0';
+ len = 0;
+ }
+ return len;
}
+static Mutex gIndexMutex;
+static sp<AlphabeticIndexRef> gIndex;
+
/**
* Returns TRUE if the character belongs to a Hanzi unicode block
*/
-static bool is_CJK(UChar c) {
- return
- (0x4e00 <= c && c <= 0x9fff) // CJK_UNIFIED_IDEOGRAPHS
- || (0x3400 <= c && c <= 0x4dbf) // CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
- || (0x3000 <= c && c <= 0x303f) // CJK_SYMBOLS_AND_PUNCTUATION
- || (0x2e80 <= c && c <= 0x2eff) // CJK_RADICALS_SUPPLEMENT
- || (0x3300 <= c && c <= 0x33ff) // CJK_COMPATIBILITY
- || (0xfe30 <= c && c <= 0xfe4f) // CJK_COMPATIBILITY_FORMS
- || (0xf900 <= c && c <= 0xfaff); // CJK_COMPATIBILITY_IDEOGRAPHS
+static bool is_CJ(UChar32 c) {
+ return (uscript_hasScript(c, USCRIPT_HAN) ||
+ uscript_hasScript(c, USCRIPT_HIRAGANA) ||
+ uscript_hasScript(c, USCRIPT_KATAKANA));
}
-int32_t GetPhonebookIndex(UCharIterator * iter, const char * locale, UChar * out, int32_t size,
- UBool * isError)
+static bool initIndexForLocale(const char *locale) {
+ if (locale == NULL) {
+ return false;
+ }
+
+ if (gIndex != NULL && gIndex->isLocale(locale)) {
+ return true;
+ }
+
+ UErrorCode status = U_ZERO_ERROR;
+ sp<AlphabeticIndexRef> newIndex(new AlphabeticIndexRef(locale, status));
+ if (newIndex == NULL || U_FAILURE(status)) {
+ return false;
+ }
+ // Always create labels for Latin characters if not present in native set
+ (*newIndex)->addLabels("en", status);
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ if ((*newIndex)->getBucketCount(status) <= 0 || U_FAILURE(status)) {
+ return false;
+ }
+
+ gIndex = newIndex;
+ return true;
+}
+
+int32_t GetPhonebookIndex(UCharIterator *iter, const char *locale,
+ UChar *out, int32_t size, UBool *isError)
{
- if (size < MIN_OUTPUT_SIZE) {
- *isError = TRUE;
- return 0;
- }
-
- *isError = FALSE;
-
- // Normalize the first character to remove accents using the NFD normalization
- UErrorCode errorCode = U_ZERO_ERROR;
- int32_t len = unorm_next(iter, out, size, UNORM_NFD,
- 0 /* options */, TRUE /* normalize */, NULL, &errorCode);
- if (U_FAILURE(errorCode)) {
- *isError = TRUE;
- return 0;
- }
-
- if (len == 0) { // Empty input string
- return 0;
- }
-
- UChar c = out[0];
-
- if (!u_isalpha(c)) {
- // Digits go into a # section. Everything else goes into the empty section
- // The unicode function u_isdigit would also identify other characters as digits (arabic),
- // but if we caught them here we'd risk having the same section before and after alpha-letters
- // which might break the assumption that each section exists only once
- if (c >= '0' && c <= '9') {
- out[0] = '#';
- return 1;
+ if (size < MIN_OUTPUT_SIZE) {
+ *isError = TRUE;
+ return 0;
}
- return 0;
- }
- c = u_toupper(c);
-
- // Check for explicitly mapped characters
- UChar c_mapped = map_character(c, DEFAULT_CHAR_MAP, sizeof(DEFAULT_CHAR_MAP) / sizeof(UChar));
- if (c_mapped != 0) {
- out[0] = c_mapped;
- return 1;
- }
-
- // Convert Kanas to Hiragana
- UChar next = len > 2 ? out[1] : 0;
- c = android::GetNormalizedCodePoint(c, next, NULL);
-
- // Traditional grouping of Hiragana characters
- if (0x3041 <= c && c <= 0x309F) {
- if (c < 0x304B) c = 0x3042; // a
- else if (c < 0x3055) c = 0x304B; // ka
- else if (c < 0x305F) c = 0x3055; // sa
- else if (c < 0x306A) c = 0x305F; // ta
- else if (c < 0x306F) c = 0x306A; // na
- else if (c < 0x307E) c = 0x306F; // ha
- else if (c < 0x3083) c = 0x307E; // ma
- else if (c < 0x3089) c = 0x3084; // ya
- else if (c < 0x308E) c = 0x3089; // ra
- else if (c < 0x3094) c = 0x308F; // wa
- else return 0; // Others are not readable
- out[0] = c;
- return 1;
- } else if (0x30A0 <= c && c <= 0x30FF) {
- // Dot, onbiki, iteration marks are not readable
- return 0;
- }
-
- if (is_CJK(c)) {
- if (strncmp(locale, "ja", 2) == 0) {
- // Japanese word meaning "misc" or "other"
- out[0] = 0x4ED6;
- return 1;
- } else {
- return 0;
+ *isError = FALSE;
+ out[0] = '\0';
+ iter->move(iter, 0, UITER_ZERO);
+ if (!iter->hasNext(iter)) { // Empty input string
+ return 0;
}
- }
+ UnicodeString ustr;
+ bool prefixIsNonNumeric = false;
+ bool prefixIsNumeric = false;
+ while (iter->hasNext(iter)) {
+ UChar32 ch = uiter_next32(iter);
+ // Ignore standard phone number separators and identify any string
+ // that otherwise starts with a number.
+ if (!prefixIsNumeric && !prefixIsNonNumeric) {
+ if (u_isdigit(ch)) {
+ prefixIsNumeric = true;
+ } else if (!u_isspace(ch) && ch != '+' && ch != '(' &&
+ ch != ')' && ch != '.' && ch != '-' && ch != '#') {
+ prefixIsNonNumeric = true;
+ }
+ }
+ ustr.append(ch);
+ }
+ if (prefixIsNumeric) {
+ out[0] = '#';
+ return 1;
+ }
- out[0] = c;
- return 1;
+ Mutex::Autolock autolock(gIndexMutex);
+ if (!initIndexForLocale(locale)) {
+ *isError = TRUE;
+ return 0;
+ }
+
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t bucketIndex = (*gIndex)->getBucketIndex(ustr, status);
+ if (U_FAILURE(status)) {
+ *isError = TRUE;
+ return 0;
+ }
+
+ int32_t len = gIndex->getLabel(bucketIndex, out, size);
+ if (len < 0) {
+ *isError = TRUE;
+ return 0;
+ }
+
+ // For Japanese, label unclassified CJK ideographs with
+ // Japanese word meaning "misc" or "other"
+ if (gIndex->isJapanese() && len == 0 && is_CJ(ustr.char32At(0))) {
+ out[0] = 0x4ED6;
+ len = 1;
+ }
+
+ return len;
}
} // namespace android
diff --git a/android/PhonebookIndexTest.cpp b/android/PhonebookIndexTest.cpp
new file mode 100644
index 0000000..2f11dbe
--- /dev/null
+++ b/android/PhonebookIndexTest.cpp
@@ -0,0 +1,243 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PhonebookIndex.h"
+
+#include <unicode/unistr.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+using namespace android;
+
+class TestExecutor {
+public:
+ TestExecutor() : m_total_count(0), m_success_count(0), m_success(true) {}
+ bool DoAllTests();
+private:
+ void DoOneTest(void (TestExecutor::*test)());
+
+ void testGetIndex(const char *src, const char *locale,
+ int32_t expected_len, UChar *expected_value);
+ void testEnglish();
+
+ // Note: When adding a test, do not forget to add it to DoOneTest().
+
+ int m_total_count;
+ int m_success_count;
+
+ bool m_success;
+};
+
+
+bool TestExecutor::DoAllTests() {
+ DoOneTest(&TestExecutor::testEnglish);
+
+ printf("Test total: %d\nSuccess: %d\nFailure: %d\n",
+ m_total_count, m_success_count, m_total_count - m_success_count);
+
+ bool success = m_total_count == m_success_count;
+ printf("\n%s\n", success ? "Success" : "Failure");
+
+ return success;
+}
+
+void TestExecutor::DoOneTest(void (TestExecutor::*test)()) {
+ m_success = true;
+
+ (this->*test)();
+
+ ++m_total_count;
+ m_success_count += m_success ? 1 : 0;
+}
+
+#define BUFFER_SIZE 10
+
+static void printUTF8Str(const char *utf8_str) {
+ printf("%s (", utf8_str);
+ for(; *utf8_str != '\0'; ++utf8_str) {
+ printf("\\x%02hhX", *utf8_str);
+ }
+ printf(")");
+}
+
+static void printUChars(const UChar *uc_str, int32_t len) {
+ std::string utf8_str;
+ UnicodeString(uc_str, len).toUTF8String(utf8_str);
+ printf("%s (", utf8_str.c_str());
+ for(int i=0; i<len; ++i) {
+ printf("0x%02hx%s", uc_str[i], i < (len - 1) ? " " : "");
+ }
+ printf(")");
+}
+
+void TestExecutor::testGetIndex(
+ const char *src, const char *locale,
+ int32_t expected_len, UChar *expected_value) {
+ UBool isError;
+
+ UCharIterator iter;
+ uiter_setUTF8(&iter, src, -1);
+
+ UChar outBuf[BUFFER_SIZE];
+
+ int32_t len = GetPhonebookIndex(&iter, locale, outBuf, sizeof(outBuf), &isError);
+ if (isError) {
+ printf("GetPhonebookIndex returned error (%s:%s)\n", locale, src);
+ m_success = false;
+ } else if (len != expected_len) {
+ printf("len is unexpected value (src: [%s] %s, ", locale, src);
+ printf("actual: %u (", len);
+ printUChars(outBuf, len);
+ printf("), expected: %u (", expected_len);
+ printUChars(expected_value, expected_len);
+ printf("))\n");
+ m_success = false;
+ } else {
+ printf("[%s] %s: ", locale, src);
+ printUChars(outBuf, len);
+
+ if (memcmp(outBuf, expected_value, sizeof(UChar)*expected_len) != 0) {
+ printf(", expected ");
+ printUChars(expected_value, expected_len);
+ m_success = false;
+ }
+ printf("\n");
+ }
+}
+
+#define TEST_GET_UTF8STR_INDEX(src, locale, ...) \
+ ({ \
+ UChar uc_expected[] = {__VA_ARGS__}; \
+ int32_t len = sizeof(uc_expected)/sizeof(UChar); \
+ testGetIndex((src), (locale), len, uc_expected); \
+ })
+
+#define TEST_GET_UCHAR_INDEX(src, locale, ...) \
+ ({ \
+ std::string utf8_str; \
+ UnicodeString((UChar) (src)).toUTF8String(utf8_str); \
+ TEST_GET_UTF8STR_INDEX(utf8_str.c_str(), (locale), __VA_ARGS__); \
+ })
+
+void TestExecutor::testEnglish() {
+ printf("testEnglish()\n");
+
+ // English [A-Z]
+ TEST_GET_UTF8STR_INDEX("Allen", "en", 'A');
+ TEST_GET_UTF8STR_INDEX("allen", "en", 'A');
+ TEST_GET_UTF8STR_INDEX("123456", "en", '#');
+ TEST_GET_UTF8STR_INDEX("+1 (123) 456-7890", "en", '#');
+ TEST_GET_UTF8STR_INDEX("(33) 44.55.66.08", "en", '#');
+ TEST_GET_UTF8STR_INDEX("123 Jump", "en", '#');
+ // Arabic numbers
+ TEST_GET_UTF8STR_INDEX("\u0662\u0663\u0664\u0665\u0666", "en", '#');
+
+ // Japanese
+ // sorts hiragana/katakana, Kanji/Chinese, English, other
+ // …, あ, か, さ, た, な, は, ま, や, ら, わ, …
+ // hiragana "a"
+ TEST_GET_UCHAR_INDEX(0x3041, "ja", 0x3042);
+ // katakana "a"
+ TEST_GET_UCHAR_INDEX(0x30A1, "ja", 0x3042);
+
+ // Kanji (sorts to inflow section)
+ TEST_GET_UCHAR_INDEX(0x65E5, "ja", 0x4ed6);
+ // English
+ TEST_GET_UTF8STR_INDEX("Smith", "ja", 'S');
+ TEST_GET_UTF8STR_INDEX("234567", "ja", '#');
+ // Chinese (sorts to inflow section)
+ TEST_GET_UCHAR_INDEX(0x6c88 /* Shen/Chen */, "ja", 0x4ed6);
+ // Korean Hangul (sorts to overflow section)
+ TEST_GET_UCHAR_INDEX(0x1100, "ja", /* null */ );
+
+ // Korean (sorts Korean, then English)
+ // …, ᄀ, ᄂ, ᄃ, ᄅ, ᄆ, ᄇ, ᄉ, ᄋ, ᄌ, ᄎ, ᄏ, ᄐ, ᄑ, ᄒ, …
+ TEST_GET_UCHAR_INDEX(0x1100, "ko", 0x1100);
+ TEST_GET_UCHAR_INDEX(0x3131, "ko", 0x1100);
+ TEST_GET_UCHAR_INDEX(0x1101, "ko", 0x1100);
+ TEST_GET_UCHAR_INDEX(0x1161, "ko", 0x1112);
+
+ // Czech
+ // …, [A-C], Č,[D-H], CH, [I-R], Ř, S, Š, [T-Z], Ž, …
+ TEST_GET_UTF8STR_INDEX("Cena", "cs", 'C');
+ TEST_GET_UTF8STR_INDEX("Čáp", "cs", 0x010c);
+ TEST_GET_UTF8STR_INDEX("Ruda", "cs", 'R');
+ TEST_GET_UTF8STR_INDEX("Řada", "cs", 0x0158);
+ TEST_GET_UTF8STR_INDEX("Selka", "cs", 'S');
+ TEST_GET_UTF8STR_INDEX("Šála", "cs", 0x0160);
+ TEST_GET_UTF8STR_INDEX("Zebra", "cs", 'Z');
+ TEST_GET_UTF8STR_INDEX("Žába", "cs", 0x017d);
+ TEST_GET_UTF8STR_INDEX("Chata", "cs", 'C', 'H');
+
+ // French: [A-Z] (no accented chars)
+ TEST_GET_UTF8STR_INDEX("Øfer", "fr", 'O');
+ TEST_GET_UTF8STR_INDEX("Œster", "fr", 'O');
+
+ // Danish: [A-Z], Æ, Ø, Å
+ TEST_GET_UTF8STR_INDEX("Ænes", "da", 0xc6);
+ TEST_GET_UTF8STR_INDEX("Øfer", "da", 0xd8);
+ TEST_GET_UTF8STR_INDEX("Œster", "da", 0xd8);
+ TEST_GET_UTF8STR_INDEX("Ågård", "da", 0xc5);
+
+ // German: [A-Z] (no ß or umlauted characters in standard alphabet)
+ TEST_GET_UTF8STR_INDEX("ßind", "de", 'S');
+
+ // Simplified Chinese (default collator Pinyin): [A-Z]
+ // Shen/Chen (simplified): should be, usually, 'S' for name collator and 'C' for apps/other
+ TEST_GET_UCHAR_INDEX(0x6c88 /* Shen/Chen */, "zh_CN", 'C');
+ // Shen/Chen (traditional)
+ TEST_GET_UCHAR_INDEX(0x700b, "zh_CN", 'S');
+ // Jia/Gu: should be, usually, 'J' for name collator and 'G' for apps/other
+ TEST_GET_UCHAR_INDEX(0x8d3e /* Jia/Gu */, "zh_CN", 'J');
+
+ // Traditional Chinese
+ // …, 一, 丁, 丈, 不, 且, 丞, 串, 並, 亭, 乘, 乾, 傀, 亂, 僎, 僵, 儐, 償, 叢, 儳, 嚴, 儷, 儻, 囌, 囑, 廳, …
+ TEST_GET_UCHAR_INDEX(0x6c88 /* Shen/Chen */, "zh_TW", 0x5080);
+ TEST_GET_UCHAR_INDEX(0x700b /* Shen/Chen */, "zh_TW", 0x53e2);
+ TEST_GET_UCHAR_INDEX(0x8d3e /* Jia/Gu */, "zh_TW", 0x5080);
+
+ // Thai (sorts English then Thai)
+ // …, ก, ข, ฃ, ค, ฅ, ฆ, ง, จ, ฉ, ช, ซ, ฌ, ญ, ฎ, ฏ, ฐ, ฑ, ฒ, ณ, ด, ต, ถ, ท, ธ, น, บ, ป, ผ, ฝ, พ, ฟ, ภ, ม, ย, ร, ฤ, ล, ฦ, ว, ศ, ษ, ส, ห, ฬ, อ, ฮ, …,
+
+ TEST_GET_UTF8STR_INDEX("\u0e2d\u0e07\u0e04\u0e4c\u0e40\u0e25\u0e47\u0e01",
+ "th", 0xe2d);
+ TEST_GET_UTF8STR_INDEX("\u0e2a\u0e34\u0e07\u0e2b\u0e40\u0e2a\u0e19\u0e35",
+ "th", 0xe2a);
+ // Thai numbers ((02) 432-0281)
+ TEST_GET_UTF8STR_INDEX("(\u0e50\u0e52) \u0e54\u0e53\u0e52-"
+ "\u0e50\u0e52\u0e58\u0e51", "th", '#');
+
+ // Arabic (sorts English then Arabic)
+ // …, ا, ب, ت, ث, ج, ح, خ, د, ذ, ر, ز, س, ش, ص, ض, ط, ظ, ع, غ, ف, ق, ك, ل, م, ن, ه, و, ي, …
+ TEST_GET_UTF8STR_INDEX("\u0646\u0648\u0631" /* Noor */, "ar", 0x646);
+ // Arabic numbers (34567)
+ TEST_GET_UTF8STR_INDEX("\u0662\u0663\u0664\u0665\u0666", "ar", '#');
+
+ // Hebrew (sorts English then Hebrew)
+ // …, א, ב, ג, ד, ה, ו, ז, ח, ט, י, כ, ל, מ, נ, ס, ע, פ, צ, ק, ר, ש, ת, …
+ TEST_GET_UTF8STR_INDEX("\u05e4\u05e8\u05d9\u05d3\u05de\u05df", "he", 0x5e4);
+}
+
+int main() {
+ TestExecutor executor;
+ if(executor.DoAllTests()) {
+ return 0;
+ } else {
+ return 1;
+ }
+}
diff --git a/android/PhoneticStringUtils.cpp b/android/PhoneticStringUtils.cpp
deleted file mode 100644
index 796eaa2..0000000
--- a/android/PhoneticStringUtils.cpp
+++ /dev/null
@@ -1,319 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "PhoneticStringUtils.h"
-#include <utils/Unicode.h>
-
-// We'd like 0 length string last of sorted list. So when input string is NULL
-// or 0 length string, we use these instead.
-#define CODEPOINT_FOR_NULL_STR 0xFFFD
-#define STR_FOR_NULL_STR "\xEF\xBF\xBD"
-
-// We assume that users will not notice strings not sorted properly when the
-// first 128 characters are the same.
-#define MAX_CODEPOINTS 128
-
-namespace android {
-
-// Get hiragana from halfwidth katakana.
-static int GetHiraganaFromHalfwidthKatakana(char32_t codepoint,
- char32_t next_codepoint,
- bool *next_is_consumed) {
- if (codepoint < 0xFF66 || 0xFF9F < codepoint) {
- return codepoint;
- }
-
- switch (codepoint) {
- case 0xFF66: // wo
- return 0x3092;
- case 0xFF67: // xa
- return 0x3041;
- case 0xFF68: // xi
- return 0x3043;
- case 0xFF69: // xu
- return 0x3045;
- case 0xFF6A: // xe
- return 0x3047;
- case 0xFF6B: // xo
- return 0x3049;
- case 0xFF6C: // xya
- return 0x3083;
- case 0xFF6D: // xyu
- return 0x3085;
- case 0xFF6E: // xyo
- return 0x3087;
- case 0xFF6F: // xtsu
- return 0x3063;
- case 0xFF70: // -
- return 0x30FC;
- case 0xFF9C: // wa
- return 0x308F;
- case 0xFF9D: // n
- return 0x3093;
- break;
- default: {
- if (0xFF71 <= codepoint && codepoint <= 0xFF75) {
- // a, i, u, e, o
- if (codepoint == 0xFF73 && next_codepoint == 0xFF9E) {
- if (next_is_consumed != NULL) {
- *next_is_consumed = true;
- }
- return 0x3094; // vu
- } else {
- return 0x3042 + (codepoint - 0xFF71) * 2;
- }
- } else if (0xFF76 <= codepoint && codepoint <= 0xFF81) {
- // ka - chi
- if (next_codepoint == 0xFF9E) {
- // "dakuten" (voiced mark)
- if (next_is_consumed != NULL) {
- *next_is_consumed = true;
- }
- return 0x304B + (codepoint - 0xFF76) * 2 + 1;
- } else {
- return 0x304B + (codepoint - 0xFF76) * 2;
- }
- } else if (0xFF82 <= codepoint && codepoint <= 0xFF84) {
- // tsu, te, to (skip xtsu)
- if (next_codepoint == 0xFF9E) {
- // "dakuten" (voiced mark)
- if (next_is_consumed != NULL) {
- *next_is_consumed = true;
- }
- return 0x3064 + (codepoint - 0xFF82) * 2 + 1;
- } else {
- return 0x3064 + (codepoint - 0xFF82) * 2;
- }
- } else if (0xFF85 <= codepoint && codepoint <= 0xFF89) {
- // na, ni, nu, ne, no
- return 0x306A + (codepoint - 0xFF85);
- } else if (0xFF8A <= codepoint && codepoint <= 0xFF8E) {
- // ha, hi, hu, he, ho
- if (next_codepoint == 0xFF9E) {
- // "dakuten" (voiced mark)
- if (next_is_consumed != NULL) {
- *next_is_consumed = true;
- }
- return 0x306F + (codepoint - 0xFF8A) * 3 + 1;
- } else if (next_codepoint == 0xFF9F) {
- // "han-dakuten" (half voiced mark)
- if (next_is_consumed != NULL) {
- *next_is_consumed = true;
- }
- return 0x306F + (codepoint - 0xFF8A) * 3 + 2;
- } else {
- return 0x306F + (codepoint - 0xFF8A) * 3;
- }
- } else if (0xFF8F <= codepoint && codepoint <= 0xFF93) {
- // ma, mi, mu, me, mo
- return 0x307E + (codepoint - 0xFF8F);
- } else if (0xFF94 <= codepoint && codepoint <= 0xFF96) {
- // ya, yu, yo
- return 0x3084 + (codepoint - 0xFF94) * 2;
- } else if (0xFF97 <= codepoint && codepoint <= 0xFF9B) {
- // ra, ri, ru, re, ro
- return 0x3089 + (codepoint - 0xFF97);
- }
- // Note: 0xFF9C, 0xFF9D are handled above
- } // end of default
- }
-
- return codepoint;
-}
-
-// Assuming input is hiragana, convert the hiragana to "normalized" hiragana.
-static int GetNormalizedHiragana(int codepoint) {
- if (codepoint < 0x3040 || 0x309F < codepoint) {
- return codepoint;
- }
-
- // TODO: should care (semi-)voiced mark (0x3099, 0x309A).
-
- // Trivial kana conversions.
- // e.g. xa => a
- switch (codepoint) {
- case 0x3041:
- case 0x3043:
- case 0x3045:
- case 0x3047:
- case 0x3049:
- case 0x3063:
- case 0x3083:
- case 0x3085:
- case 0x3087:
- case 0x308E: // xwa
- return codepoint + 1;
- case 0x3095: // xka
- return 0x304B;
- case 0x3096: // xke
- return 0x3051;
- case 0x31F0: // xku
- return 0x304F;
- case 0x31F1: // xsi
- return 0x3057;
- case 0x31F2: // xsu
- return 0x3059;
- case 0x31F3: // xto
- return 0x3068;
- case 0x31F4: // xnu
- return 0x306C;
- case 0x31F5: // xha
- return 0x306F;
- case 0x31F6: // xhi
- return 0x3072;
- case 0x31F7: // xhu
- return 0x3075;
- case 0x31F8: // xhe
- return 0x3078;
- case 0x31F9: // xho
- return 0x307B;
- case 0x31FA: // xmu
- return 0x3080;
- case 0x31FB: // xra
- case 0x31FC: // xri
- case 0x31FD: // xru
- case 0x31FE: // xre
- case 0x31FF: // xro
- // ra: 0x3089
- return 0x3089 + (codepoint - 0x31FB);
- default:
- return codepoint;
- }
-}
-
-static int GetNormalizedKana(char32_t codepoint,
- char32_t next_codepoint,
- bool *next_is_consumed) {
- // First, convert fullwidth katakana and halfwidth katakana to hiragana.
- if (0x30A1 <= codepoint && codepoint <= 0x30F6) {
- // Make fullwidth katakana same as hiragana.
- // 96 == 0x30A1 - 0x3041c
- codepoint = codepoint - 96;
- } else if (codepoint == 0x309F) {
- // Digraph YORI; Yo
- codepoint = 0x3088;
- } else if (codepoint == 0x30FF) {
- // Digraph KOTO; Ko
- codepoint = 0x3053;
- } else {
- codepoint = GetHiraganaFromHalfwidthKatakana(
- codepoint, next_codepoint, next_is_consumed);
- }
-
- // Normalize Hiragana.
- return GetNormalizedHiragana(codepoint);
-}
-
-int GetNormalizedCodePoint(char32_t codepoint,
- char32_t next_codepoint,
- bool *next_is_consumed) {
- if (next_is_consumed != NULL) {
- *next_is_consumed = false;
- }
-
- if (codepoint <= 0x0020 || codepoint == 0x3000) {
- // Whitespaces. Keep it as is.
- return codepoint;
- } else if ((0x0021 <= codepoint && codepoint <= 0x007E) ||
- (0xFF01 <= codepoint && codepoint <= 0xFF5E)) {
- // Ascii and fullwidth ascii. Keep it as is
- return codepoint;
- } else if (codepoint == 0x02DC || codepoint == 0x223C) {
- // tilde
- return 0xFF5E;
- } else if (codepoint <= 0x3040 ||
- (0x3100 <= codepoint && codepoint < 0xFF00) ||
- codepoint == CODEPOINT_FOR_NULL_STR) {
- // Keep it as is.
- return codepoint;
- }
-
- // Below is Kana-related handling.
-
- return GetNormalizedKana(codepoint, next_codepoint, next_is_consumed);
-}
-
-static bool GetExpectedString(
- const char *src, char **dst, size_t *dst_len,
- int (*get_codepoint_function)(char32_t, char32_t, bool*)) {
- if (dst == NULL || dst_len == NULL) {
- return false;
- }
-
- if (src == NULL || *src == '\0') {
- src = STR_FOR_NULL_STR;
- }
-
- char32_t codepoints[MAX_CODEPOINTS]; // if array size is changed the for loop needs to be changed
-
- ssize_t src_len = utf8_length(src);
- if (src_len <= 0) {
- return false;
- }
-
- bool next_is_consumed;
- size_t j = 0;
- for (size_t i = 0; i < (size_t)src_len && j < MAX_CODEPOINTS;) {
- int32_t ret = utf32_from_utf8_at(src, src_len, i, &i);
- if (ret < 0) {
- // failed to parse UTF-8
- return false;
- }
- ret = get_codepoint_function(
- static_cast<char32_t>(ret),
- i + 1 < (size_t)src_len ? src[i + 1] : 0,
- &next_is_consumed);
- if (ret > 0) {
- codepoints[j] = static_cast<char32_t>(ret);
- j++;
- }
- if (next_is_consumed) {
- i++;
- }
- }
- size_t length = j;
-
- if (length == 0) {
- // If all of codepoints are invalid, we place the string at the end of
- // the list.
- codepoints[0] = 0x10000 + CODEPOINT_FOR_NULL_STR;
- length = 1;
- }
-
- ssize_t new_len = utf32_to_utf8_length(codepoints, length);
- if (new_len < 0) {
- return false;
- }
-
- *dst = static_cast<char *>(malloc(new_len + 1));
- if (*dst == NULL) {
- return false;
- }
-
- utf32_to_utf8(codepoints, length, *dst);
-
- *dst_len = new_len;
- return true;
-}
-
-bool GetNormalizedString(const char *src, char **dst, size_t *len) {
- return GetExpectedString(src, dst, len, GetNormalizedCodePoint);
-}
-
-} // namespace android
diff --git a/android/PhoneticStringUtils.h b/android/PhoneticStringUtils.h
deleted file mode 100644
index a567a27..0000000
--- a/android/PhoneticStringUtils.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _ANDROID_PHONETIC_STRING_UTILS_H
-#define _ANDROID_PHONETIC_STRING_UTILS_H
-
-#include <string.h> // For size_t.
-#include <utils/String8.h>
-
-namespace android {
-
-// Returns codepoint which is "normalized", whose definition depends on each
-// Locale. Note that currently this function normalizes only Japanese; the
-// other characters are remained as is.
-// The variable "next_is_consumed" is set to true if "next_codepoint"
-// is "consumed" (e.g. Japanese halfwidth katakana's voiced mark is consumed
-// when previous "codepoint" is appropriate, like half-width "ka").
-//
-// In Japanese, "normalized" means that half-width and full-width katakana is
-// appropriately converted to hiragana.
-int GetNormalizedCodePoint(char32_t codepoint,
- char32_t next_codepoint,
- bool *next_is_consumed);
-
-// Pushes Utf8 expression of "codepoint" to "dst". Returns true when successful.
-// If input is invalid or the length of the destination is not enough,
-// returns false.
-bool GetUtf8FromCodePoint(int codepoint, char *dst, size_t len, size_t *index);
-
-// Creates a "phonetically sortable" Utf8 string and push it into "dst".
-// *dst must be freed after being used outside.
-// If "src" is NULL or its length is 0, "dst" is set to \uFFFF.
-//
-// Note that currently this function considers only Japanese.
-bool GetPhoneticallySortableString(const char *src, char **dst, size_t *len);
-
-// Creates a "normalized" Utf8 string and push it into "dst". *dst must be
-// freed after being used outside.
-// If "src" is NULL or its length is 0, "dst" is set to \uFFFF.
-//
-// Note that currently this function considers only Japanese.
-bool GetNormalizedString(const char *src, char **dst, size_t *len);
-
-} // namespace android
-
-#endif
diff --git a/android/PhoneticStringUtilsTest.cpp b/android/PhoneticStringUtilsTest.cpp
deleted file mode 100644
index 9885823..0000000
--- a/android/PhoneticStringUtilsTest.cpp
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "PhoneticStringUtils.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <utils/String8.h>
-
-using namespace android;
-
-class TestExecutor {
- public:
- TestExecutor() : m_total_count(0), m_success_count(0), m_success(true) {}
- bool DoAllTests();
- private:
- void DoOneTest(void (TestExecutor::*test)());
-
- void testUtf32At();
- void testGetUtf8FromUtf32();
- void testGetNormalizedString();
- void testLongString();
-
- // Note: When adding a test, do not forget to add it to DoOneTest().
-
- int m_total_count;
- int m_success_count;
-
- bool m_success;
-};
-
-#define ASSERT_EQ_VALUE(input, expected) \
- ({ \
- if ((expected) != (input)) { \
- printf("0x%X(result) != 0x%X(expected)\n", input, expected); \
- m_success = false; \
- return; \
- } \
- })
-
-#define EXPECT_EQ_VALUE(input, expected) \
- ({ \
- if ((expected) != (input)) { \
- printf("0x%X(result) != 0x%X(expected)\n", input, expected); \
- m_success = false; \
- } \
- })
-
-
-bool TestExecutor::DoAllTests() {
- DoOneTest(&TestExecutor::testUtf32At);
- DoOneTest(&TestExecutor::testGetUtf8FromUtf32);
- DoOneTest(&TestExecutor::testGetNormalizedString);
- DoOneTest(&TestExecutor::testLongString);
-
- printf("Test total: %d\nSuccess: %d\nFailure: %d\n",
- m_total_count, m_success_count, m_total_count - m_success_count);
-
- bool success = m_total_count == m_success_count;
- printf("\n%s\n", success ? "Success" : "Failure");
-
- return success;
-}
-
-void TestExecutor::DoOneTest(void (TestExecutor::*test)()) {
- m_success = true;
-
- (this->*test)();
-
- ++m_total_count;
- m_success_count += m_success ? 1 : 0;
-}
-
-#define TEST_GET_UTF32AT(src, index, expected_next, expected_value) \
- ({ \
- size_t next; \
- int32_t ret = utf32_from_utf8_at(src, strlen(src), index, &next); \
- if (ret < 0) { \
- printf("getUtf32At() returned negative value (src: %s, index: %d)\n", \
- (src), (index)); \
- m_success = false; \
- } else if (next != (expected_next)) { \
- printf("next is unexpected value (src: %s, actual: %u, expected: %u)\n", \
- (src), next, (expected_next)); \
- } else { \
- EXPECT_EQ_VALUE(ret, (expected_value)); \
- } \
- })
-
-void TestExecutor::testUtf32At() {
- printf("testUtf32At()\n");
-
- TEST_GET_UTF32AT("a", 0, 1, 97);
- // Japanese hiragana "a"
- TEST_GET_UTF32AT("\xE3\x81\x82", 0, 3, 0x3042);
- // Japanese fullwidth katakana "a" with ascii a
- TEST_GET_UTF32AT("a\xE3\x82\xA2", 1, 4, 0x30A2);
-
- // 2 PUA
- TEST_GET_UTF32AT("\xF3\xBE\x80\x80\xF3\xBE\x80\x88", 0, 4, 0xFE000);
- TEST_GET_UTF32AT("\xF3\xBE\x80\x80\xF3\xBE\x80\x88", 4, 8, 0xFE008);
-}
-
-
-#define EXPECT_EQ_CODEPOINT_UTF8(codepoint, expected) \
- ({ \
- char32_t codepoints[1] = {codepoint}; \
- status_t ret = string8.setTo(codepoints, 1); \
- if (ret != NO_ERROR) { \
- printf("GetUtf8FromCodePoint() returned false at 0x%04X\n", codepoint); \
- m_success = false; \
- } else { \
- const char* string = string8.string(); \
- if (strcmp(string, expected) != 0) { \
- printf("Failed at codepoint 0x%04X\n", codepoint); \
- for (const char *ch = string; *ch != '\0'; ++ch) { \
- printf("0x%X ", *ch); \
- } \
- printf("!= "); \
- for (const char *ch = expected; *ch != '\0'; ++ch) { \
- printf("0x%X ", *ch); \
- } \
- printf("\n"); \
- m_success = false; \
- } \
- } \
- })
-
-void TestExecutor::testGetUtf8FromUtf32() {
- printf("testGetUtf8FromUtf32()\n");
- String8 string8;
-
- EXPECT_EQ_CODEPOINT_UTF8('a', "\x61");
- // Armenian capital letter AYB (2 bytes in UTF8)
- EXPECT_EQ_CODEPOINT_UTF8(0x0530, "\xD4\xB0");
- // Japanese 'a' (3 bytes in UTF8)
- EXPECT_EQ_CODEPOINT_UTF8(0x3042, "\xE3\x81\x82");
- // Kanji
- EXPECT_EQ_CODEPOINT_UTF8(0x65E5, "\xE6\x97\xA5");
- // PUA (4 byets in UTF8)
- EXPECT_EQ_CODEPOINT_UTF8(0xFE016, "\xF3\xBE\x80\x96");
- EXPECT_EQ_CODEPOINT_UTF8(0xFE972, "\xF3\xBE\xA5\xB2");
-}
-
-#define EXPECT_EQ_UTF8_UTF8(src, expected) \
- ({ \
- if (!GetNormalizedString(src, &dst, &len)) { \
- printf("GetNormalizedSortableString() returned false.\n"); \
- m_success = false; \
- } else { \
- if (strcmp(dst, expected) != 0) { \
- for (const char *ch = dst; *ch != '\0'; ++ch) { \
- printf("0x%X ", *ch); \
- } \
- printf("!= "); \
- for (const char *ch = expected; *ch != '\0'; ++ch) { \
- printf("0x%X ", *ch); \
- } \
- printf("\n"); \
- m_success = false; \
- } \
- free(dst); \
- } \
- })
-
-void TestExecutor::testGetNormalizedString() {
- printf("testGetNormalizedString()\n");
- char *dst;
- size_t len;
-
- // halfwidth alphabets/symbols -> keep it as is.
- EXPECT_EQ_UTF8_UTF8("ABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%^&'()",
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%^&'()");
- EXPECT_EQ_UTF8_UTF8("abcdefghijklmnopqrstuvwxyz[]{}\\@/",
- "abcdefghijklmnopqrstuvwxyz[]{}\\@/");
-
- // halfwidth/fullwidth-katakana -> hiragana
- EXPECT_EQ_UTF8_UTF8(
- "\xE3\x81\x82\xE3\x82\xA4\xE3\x81\x86\xEF\xBD\xB4\xE3\x82\xAA",
- "\xE3\x81\x82\xE3\x81\x84\xE3\x81\x86\xE3\x81\x88\xE3\x81\x8A");
-
- // whitespace -> keep it as is.
- EXPECT_EQ_UTF8_UTF8(" \t", " \t");
-}
-
-void TestExecutor::testLongString() {
- printf("testLongString()\n");
- char * dst;
- size_t len;
- EXPECT_EQ_UTF8_UTF8("Qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqtttttttttttttttttttttttttttttttttttttttttttttttttgggggggggggggggggggggggggggggggggggggggbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
- "Qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqtttttttttttttttttttttttttttttttttttttttttttttttttggggggggggggggggggggggggggggggggggg");
-}
-
-
-int main() {
- TestExecutor executor;
- if(executor.DoAllTests()) {
- return 0;
- } else {
- return 1;
- }
-}
diff --git a/android/sqlite3_android.cpp b/android/sqlite3_android.cpp
index 5daf15e..fe826fd 100644
--- a/android/sqlite3_android.cpp
+++ b/android/sqlite3_android.cpp
@@ -509,14 +509,8 @@
//// PHONEBOOK_COLLATOR
- // The collator may be removed in the near future. Do not depend on it.
- // TODO: it might be better to have another function for registering phonebook collator.
status = U_ZERO_ERROR;
- if (strcmp(systemLocale, "ja") == 0 || strcmp(systemLocale, "ja_JP") == 0) {
- collator = ucol_open("ja@collation=phonebook", &status);
- } else {
- collator = ucol_open(systemLocale, &status);
- }
+ collator = ucol_open(systemLocale, &status);
if (U_FAILURE(status)) {
return -1;
}