blob: 68674f4e8a2d89f4272282d0b68350168d431e7d [file] [log] [blame]
/*
* Copyright 2010, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <stdio.h>
#include <unicode/alphaindex.h>
#include <unicode/ucol.h>
#include <unicode/uiter.h>
#include <unicode/ustring.h>
#include <unicode/utypes.h>
#include <unicode/uloc.h>
#include <utils/Mutex.h>
#include <utils/RefBase.h>
#include "PhonebookIndex.h"
#define MIN_OUTPUT_SIZE 6 // Minimum required size for the output buffer (in bytes)
namespace android {
// Wrapper class to enable using libutil SmartPointers with AlphabeticIndex.
class AlphabeticIndexRef : public RefBase {
public:
AlphabeticIndexRef(const char *locale, UErrorCode &status) :
m_index(locale, status), m_locale(NULL), m_isJapanese(false) {
if (U_FAILURE(status)) {
return;
}
m_locale = strdup(locale);
if (m_locale == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
char language[4];
uloc_getLanguage(locale, language, sizeof(language), &status);
if (U_FAILURE(status)) {
return;
}
m_isJapanese = (strcmp(language, ULOC_JAPANESE) == 0);
}
virtual ~AlphabeticIndexRef() { free(m_locale); }
AlphabeticIndex& operator*() { return m_index; }
AlphabeticIndex* operator->() { return &m_index; }
bool isLocale(const char *locale) const {
return (locale != NULL && m_locale != NULL &&
strcmp(m_locale, locale) == 0);
}
bool isJapanese() const { return m_isJapanese; }
int32_t getLabel(int32_t bucketIndex, UChar *labelBuf, int32_t labelBufSize);
private:
AlphabeticIndex m_index;
char *m_locale;
bool m_isJapanese;
};
int32_t AlphabeticIndexRef::getLabel(int32_t bucketIndex, UChar *labelBuf,
int32_t labelBufSize) {
UErrorCode status = U_ZERO_ERROR;
m_index.resetBucketIterator(status);
if (U_FAILURE(status)) {
return -1;
}
for(int i = 0; i <= bucketIndex; ++i) {
if (!m_index.nextBucket(status) || U_FAILURE(status)) {
return -1;
}
}
int32_t len;
if (m_index.getBucketLabelType() == U_ALPHAINDEX_NORMAL) {
len = m_index.getBucketLabel().extract(labelBuf, labelBufSize, status);
if (U_FAILURE(status)) {
return -1;
}
} else {
// Use no label for underflow/inflow/overflow buckets
labelBuf[0] = '\0';
len = 0;
}
return len;
}
static Mutex gIndexMutex;
static sp<AlphabeticIndexRef> gIndex;
/**
* Returns TRUE if the character belongs to a Hanzi unicode block
*/
static bool is_CJ(UChar32 c) {
return (uscript_hasScript(c, USCRIPT_HAN) ||
uscript_hasScript(c, USCRIPT_HIRAGANA) ||
uscript_hasScript(c, USCRIPT_KATAKANA));
}
static bool initIndexForLocale(const char *locale) {
if (locale == NULL) {
return false;
}
if (gIndex != NULL && gIndex->isLocale(locale)) {
return true;
}
UErrorCode status = U_ZERO_ERROR;
sp<AlphabeticIndexRef> newIndex(new AlphabeticIndexRef(locale, status));
if (newIndex == NULL || U_FAILURE(status)) {
return false;
}
// Always create labels for Latin characters if not present in native set
(*newIndex)->addLabels("en", status);
if (U_FAILURE(status)) {
return false;
}
if ((*newIndex)->getBucketCount(status) <= 0 || U_FAILURE(status)) {
return false;
}
gIndex = newIndex;
return true;
}
int32_t GetPhonebookIndex(UCharIterator *iter, const char *locale,
UChar *out, int32_t size, UBool *isError)
{
if (size < MIN_OUTPUT_SIZE) {
*isError = TRUE;
return 0;
}
*isError = FALSE;
out[0] = '\0';
iter->move(iter, 0, UITER_ZERO);
if (!iter->hasNext(iter)) { // Empty input string
return 0;
}
UnicodeString ustr;
bool prefixIsNonNumeric = false;
bool prefixIsNumeric = false;
while (iter->hasNext(iter)) {
UChar32 ch = uiter_next32(iter);
// Ignore standard phone number separators and identify any string
// that otherwise starts with a number.
if (!prefixIsNumeric && !prefixIsNonNumeric) {
if (u_isdigit(ch)) {
prefixIsNumeric = true;
} else if (!u_isspace(ch) && ch != '+' && ch != '(' &&
ch != ')' && ch != '.' && ch != '-' && ch != '#') {
prefixIsNonNumeric = true;
}
}
ustr.append(ch);
}
if (prefixIsNumeric) {
out[0] = '#';
return 1;
}
Mutex::Autolock autolock(gIndexMutex);
if (!initIndexForLocale(locale)) {
*isError = TRUE;
return 0;
}
UErrorCode status = U_ZERO_ERROR;
int32_t bucketIndex = (*gIndex)->getBucketIndex(ustr, status);
if (U_FAILURE(status)) {
*isError = TRUE;
return 0;
}
int32_t len = gIndex->getLabel(bucketIndex, out, size);
if (len < 0) {
*isError = TRUE;
return 0;
}
// For Japanese, label unclassified CJK ideographs with
// Japanese word meaning "misc" or "other"
if (gIndex->isJapanese() && len == 0 && is_CJ(ustr.char32At(0))) {
out[0] = 0x4ED6;
len = 1;
}
return len;
}
} // namespace android