Merge commit 'korg/cupcake'
diff --git a/android/PhoneticStringUtils.cpp b/android/PhoneticStringUtils.cpp
index 7c8d185..5f8781c 100644
--- a/android/PhoneticStringUtils.cpp
+++ b/android/PhoneticStringUtils.cpp
@@ -89,11 +89,10 @@
}
if (codepoint <= 0x0020 || codepoint == 0x3000) {
- // Whitespace.
- // Skip without increment of the variable "new_len".
+ // Whitespace should be ignored.
// Note: Formally, more "whitespace" exist. This block only
// handles part of them
- return 0x0020;
+ return -1;
} else if ((0x0021 <= codepoint && codepoint <= 0x007E) ||
(0xFF01 <= codepoint && codepoint <= 0xFF5E)) {
// Ascii and fullwidth ascii
@@ -369,7 +368,7 @@
for (codepoint_index = 0, i = 0, next = 0;
static_cast<size_t>(i) < src_len &&
codepoint_index < MAX_CODEPOINTS;
- i = next, codepoint_index++) {
+ i = next) {
int codepoint = GetCodePointFromUtf8(src, src_len, i, &next);
if (codepoint <= 0) {
return false;
@@ -384,12 +383,16 @@
GetPhoneticallySortableCodePoint(codepoint,
next_codepoint,
&next_is_consumed);
-
// dakuten (voiced mark) or han-dakuten (half-voiced mark) existed.
if (next_is_consumed) {
next = tmp_next;
}
+ if (codepoints[codepoint_index] < 0) {
+ // Do not increment codepoint_index.
+ continue;
+ }
+
if (codepoints[codepoint_index] < 128) { // 1 << 7
new_len++;
} else if (codepoints[codepoint_index] < 2048) {
@@ -407,9 +410,19 @@
} else {
new_len += 6;
}
+
+ codepoint_index++;
}
}
+ if (codepoint_index == 0) {
+ // If all of codepoints are invalid, we place the string at the end of
+ // the list.
+ codepoints[0] = 0x10000 + CODEPOINT_FOR_NULL_STR;
+ codepoint_index = 1;
+ new_len = 4;
+ }
+
new_len += 1; // For '\0'.
*dst = static_cast<char *>(malloc(sizeof(char) * new_len));
diff --git a/android/PhoneticStringUtils.h b/android/PhoneticStringUtils.h
index 5649783..7ebf9e0 100644
--- a/android/PhoneticStringUtils.h
+++ b/android/PhoneticStringUtils.h
@@ -29,7 +29,8 @@
// depends on each Locale. Note that currently this function considers only
// Japanese. The variable "next_is_consumed" is set to true if "next_codepoint"
// is "consumed" (e.g. Japanese halfwidth katakana's voiced mark is consumed
-// when previous "codepoint" is appropriate)
+// when previous "codepoint" is appropriate). If the codepoint should not be
+// considered when sorting (e.g. whitespaces), -1 is returned.
int GetPhoneticallySortableCodePoint(int codepoint,
int next_codepoint,
bool *next_is_consumed);
diff --git a/android/PhoneticStringUtilsTest.cpp b/android/PhoneticStringUtilsTest.cpp
index ae9df2a..0541007 100644
--- a/android/PhoneticStringUtilsTest.cpp
+++ b/android/PhoneticStringUtilsTest.cpp
@@ -32,6 +32,7 @@
void testGetCodePointFromUtf8();
void testGetPhoneticallySortableCodePointAscii();
void testGetPhoneticallySortableCodePointKana();
+ void testGetPhoneticallySortableCodePointWhitespaceOnly();
void testGetPhoneticallySortableCodePointSimpleCompare();
void testGetUtf8FromCodePoint();
void testGetPhoneticallySortableString();
@@ -66,6 +67,7 @@
DoOneTest(&TestExecutor::testGetCodePointFromUtf8);
DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointAscii);
DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointKana);
+ DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointWhitespaceOnly);
DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointSimpleCompare);
DoOneTest(&TestExecutor::testGetUtf8FromCodePoint);
DoOneTest(&TestExecutor::testGetPhoneticallySortableString);
@@ -121,6 +123,8 @@
&next_is_consumed);
if (halfwidth[i] < 0) {
printf("returned value become negative at 0x%04X", codepoint);
+ m_success = false;
+ return;
}
if (next_is_consumed) {
printf("next_is_consumed become true at 0x%04X", codepoint);
@@ -133,6 +137,8 @@
&next_is_consumed);
if (fullwidth[i] < 0) {
printf("returned value become negative at 0x%04X", codepoint);
+ m_success = false;
+ return;
}
if (next_is_consumed) {
printf("next_is_consumed become true at 0x%04X", codepoint);
@@ -158,6 +164,8 @@
&next_is_consumed);
if (hiragana[i] < 0) {
printf("returned value become negative at 0x%04X", codepoint);
+ m_success = false;
+ return;
}
if (next_is_consumed) {
printf("next_is_consumed become true at 0x%04X", codepoint);
@@ -171,6 +179,8 @@
&next_is_consumed);
if (fullwidth_katakana[i] < 0) {
printf("returned value become negative at 0x%04X", codepoint);
+ m_success = false;
+ return;
}
if (next_is_consumed) {
printf("next_is_consumed become true at 0x%04X", codepoint);
@@ -221,6 +231,19 @@
}
}
+void TestExecutor::testGetPhoneticallySortableCodePointWhitespaceOnly() {
+ printf("testGetPhoneticallySortableCodePointWhitespaceOnly");
+ // Halfwidth space
+ int result = GetPhoneticallySortableCodePoint(0x0020, 0x0061, NULL);
+ ASSERT_EQ_VALUE(result, -1);
+ // Fullwidth space
+ result = GetPhoneticallySortableCodePoint(0x3000, 0x0062, NULL);
+ ASSERT_EQ_VALUE(result, -1);
+ // tab
+ result = GetPhoneticallySortableCodePoint(0x0009, 0x0062, NULL);
+ ASSERT_EQ_VALUE(result, -1);
+}
+
void TestExecutor::testGetPhoneticallySortableCodePointSimpleCompare() {
printf("testGetPhoneticallySortableCodePointSimpleCompare()\n");
@@ -345,6 +368,9 @@
EXPECT_EQ_UTF8_UTF8(
"\xE3\x81\x82\xE3\x82\xA4\xE3\x81\x86\xEF\xBD\xB4\xE3\x82\xAA",
"\xE3\x81\x82\xE3\x81\x84\xE3\x81\x86\xE3\x81\x88\xE3\x81\x8A");
+
+ // whitespace -> string which should be placed at last
+ EXPECT_EQ_UTF8_UTF8(" \t", "\xF0\x9F\xBF\xBD");
}
int main() {
diff --git a/android/sqlite3_android.cpp b/android/sqlite3_android.cpp
index 55dcd5a..27334ef 100644
--- a/android/sqlite3_android.cpp
+++ b/android/sqlite3_android.cpp
@@ -82,7 +82,9 @@
size_t len;
if (!android::GetPhoneticallySortableString(src, &ret, &len)) {
- sqlite3_result_null(context);
+ // Put this text at the end of a list.
+ sqlite3_result_text(context, "\xF0\x9F\xBF\xBD", -1, SQLITE_STATIC);
+ // sqlite3_result_null(context);
} else {
sqlite3_result_text(context, ret, len, free);
}