android/PhonebookIndex.cpp - platform/external/sqlite - Git at Google

 /*
  * Copyright 2010, The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #include <stdlib.h>
 #include <ctype.h>
 #include <string.h>
 #include <stdio.h>

 #include <unicode/alphaindex.h>
 #include <unicode/ucol.h>
 #include <unicode/uiter.h>
 #include <unicode/ustring.h>
 #include <unicode/utypes.h>
 #include <unicode/uloc.h>
 #include <utils/Mutex.h>
 #include <utils/RefBase.h>

 #include "PhonebookIndex.h"

 #define MIN_OUTPUT_SIZE 6       // Minimum required size for the output buffer (in bytes)

 namespace android {

 // Wrapper class to enable using libutil SmartPointers with AlphabeticIndex.
 class AlphabeticIndexRef : public RefBase {
 public:
     AlphabeticIndexRef(const char *locale, UErrorCode &status) :
         m_index(locale, status), m_locale(NULL), m_isJapanese(false) {
         if (U_FAILURE(status)) {
             return;
         }
         m_locale = strdup(locale);
         if (m_locale == NULL) {
             status = U_MEMORY_ALLOCATION_ERROR;
             return;
         }
         char language[4];
         uloc_getLanguage(locale, language, sizeof(language), &status);
         if (U_FAILURE(status)) {
             return;
         }
         m_isJapanese = (strcmp(language, ULOC_JAPANESE) == 0);
     }
     virtual ~AlphabeticIndexRef() { free(m_locale); }

     AlphabeticIndex& operator*() { return m_index; }
     AlphabeticIndex* operator->() { return &m_index; }

     bool isLocale(const char *locale) const {
         return (locale != NULL && m_locale != NULL &&
                 strcmp(m_locale, locale) == 0);
     }
     bool isJapanese() const { return m_isJapanese; }
     int32_t getLabel(int32_t bucketIndex, UChar *labelBuf, int32_t labelBufSize);

 private:
     AlphabeticIndex m_index;
     char *m_locale;
     bool m_isJapanese;
 };

 int32_t AlphabeticIndexRef::getLabel(int32_t bucketIndex, UChar *labelBuf,
                                      int32_t labelBufSize) {
     UErrorCode status = U_ZERO_ERROR;
     m_index.resetBucketIterator(status);
     if (U_FAILURE(status)) {
         return -1;
     }
     for(int i = 0; i <= bucketIndex; ++i) {
         if (!m_index.nextBucket(status) || U_FAILURE(status)) {
             return -1;
         }
     }

     int32_t len;
     if (m_index.getBucketLabelType() == U_ALPHAINDEX_NORMAL) {
         len = m_index.getBucketLabel().extract(labelBuf, labelBufSize, status);
         if (U_FAILURE(status)) {
             return -1;
         }
     } else {
         // Use no label for underflow/inflow/overflow buckets
         labelBuf[0] = '\0';
         len = 0;
     }
     return len;
 }

 static Mutex gIndexMutex;
 static sp<AlphabeticIndexRef> gIndex;

 /**
  * Returns TRUE if the character belongs to a Hanzi unicode block
  */
 static bool is_CJ(UChar32 c) {
     return (uscript_hasScript(c, USCRIPT_HAN) ||
             uscript_hasScript(c, USCRIPT_HIRAGANA) ||
             uscript_hasScript(c, USCRIPT_KATAKANA));
 }

 static bool initIndexForLocale(const char *locale) {
     if (locale == NULL) {
         return false;
     }

     if (gIndex != NULL && gIndex->isLocale(locale)) {
         return true;
     }

     UErrorCode status = U_ZERO_ERROR;
     sp<AlphabeticIndexRef> newIndex(new AlphabeticIndexRef(locale, status));
     if (newIndex == NULL || U_FAILURE(status)) {
         return false;
     }
     // Always create labels for Latin characters if not present in native set
     (*newIndex)->addLabels("en", status);
     if (U_FAILURE(status)) {
         return false;
     }
     if ((*newIndex)->getBucketCount(status) <= 0 || U_FAILURE(status)) {
         return false;
     }

     gIndex = newIndex;
     return true;
 }

 int32_t GetPhonebookIndex(UCharIterator *iter, const char *locale,
                           UChar *out, int32_t size, UBool *isError)
 {
     if (size < MIN_OUTPUT_SIZE) {
         *isError = TRUE;
         return 0;
     }

     *isError = FALSE;
     out[0] = '\0';
     iter->move(iter, 0, UITER_ZERO);
     if (!iter->hasNext(iter)) {   // Empty input string
         return 0;
     }
     UnicodeString ustr;
     bool prefixIsNonNumeric = false;
     bool prefixIsNumeric = false;
     while (iter->hasNext(iter)) {
         UChar32 ch = uiter_next32(iter);
         // Ignore standard phone number separators and identify any string
         // that otherwise starts with a number.
         if (!prefixIsNumeric && !prefixIsNonNumeric) {
             if (u_isdigit(ch)) {
                 prefixIsNumeric = true;
             } else if (!u_isspace(ch) && ch != '+' && ch != '(' &&
                        ch != ')' && ch != '.' && ch != '-' && ch != '#') {
                 prefixIsNonNumeric = true;
             }
         }
         ustr.append(ch);
     }
     if (prefixIsNumeric) {
         out[0] = '#';
         return 1;
     }

     Mutex::Autolock autolock(gIndexMutex);
     if (!initIndexForLocale(locale)) {
         *isError = TRUE;
         return 0;
     }

     UErrorCode status = U_ZERO_ERROR;
     int32_t bucketIndex = (*gIndex)->getBucketIndex(ustr, status);
     if (U_FAILURE(status)) {
         *isError = TRUE;
         return 0;
     }

     int32_t len = gIndex->getLabel(bucketIndex, out, size);
     if (len < 0) {
         *isError = TRUE;
         return 0;
     }

     // For Japanese, label unclassified CJK ideographs with
     // Japanese word meaning "misc" or "other"
     if (gIndex->isJapanese() && len == 0 && is_CJ(ustr.char32At(0))) {
         out[0] = 0x4ED6;
         len = 1;
     }

     return len;
 }

 }  // namespace android
	/*
	* Copyright 2010, The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#include <stdlib.h>
	#include <ctype.h>
	#include <string.h>
	#include <stdio.h>

	#include <unicode/alphaindex.h>
	#include <unicode/ucol.h>
	#include <unicode/uiter.h>
	#include <unicode/ustring.h>
	#include <unicode/utypes.h>
	#include <unicode/uloc.h>
	#include <utils/Mutex.h>
	#include <utils/RefBase.h>

	#include "PhonebookIndex.h"

	#define MIN_OUTPUT_SIZE 6 // Minimum required size for the output buffer (in bytes)

	namespace android {

	// Wrapper class to enable using libutil SmartPointers with AlphabeticIndex.
	class AlphabeticIndexRef : public RefBase {
	public:
	AlphabeticIndexRef(const char *locale, UErrorCode &status) :
	m_index(locale, status), m_locale(NULL), m_isJapanese(false) {
	if (U_FAILURE(status)) {
	return;
	}
	m_locale = strdup(locale);
	if (m_locale == NULL) {
	status = U_MEMORY_ALLOCATION_ERROR;
	return;
	}
	char language[4];
	uloc_getLanguage(locale, language, sizeof(language), &status);
	if (U_FAILURE(status)) {
	return;
	}
	m_isJapanese = (strcmp(language, ULOC_JAPANESE) == 0);
	}
	virtual ~AlphabeticIndexRef() { free(m_locale); }

	AlphabeticIndex& operator*() { return m_index; }
	AlphabeticIndex* operator->() { return &m_index; }

	bool isLocale(const char *locale) const {
	return (locale != NULL && m_locale != NULL &&
	strcmp(m_locale, locale) == 0);
	}
	bool isJapanese() const { return m_isJapanese; }
	int32_t getLabel(int32_t bucketIndex, UChar *labelBuf, int32_t labelBufSize);

	private:
	AlphabeticIndex m_index;
	char *m_locale;
	bool m_isJapanese;
	};

	int32_t AlphabeticIndexRef::getLabel(int32_t bucketIndex, UChar *labelBuf,
	int32_t labelBufSize) {
	UErrorCode status = U_ZERO_ERROR;
	m_index.resetBucketIterator(status);
	if (U_FAILURE(status)) {
	return -1;
	}
	for(int i = 0; i <= bucketIndex; ++i) {
	if (!m_index.nextBucket(status) \|\| U_FAILURE(status)) {
	return -1;
	}
	}

	int32_t len;
	if (m_index.getBucketLabelType() == U_ALPHAINDEX_NORMAL) {
	len = m_index.getBucketLabel().extract(labelBuf, labelBufSize, status);
	if (U_FAILURE(status)) {
	return -1;
	}
	} else {
	// Use no label for underflow/inflow/overflow buckets
	labelBuf[0] = '\0';
	len = 0;
	}
	return len;
	}

	static Mutex gIndexMutex;
	static sp<AlphabeticIndexRef> gIndex;

	/**
	* Returns TRUE if the character belongs to a Hanzi unicode block
	*/
	static bool is_CJ(UChar32 c) {
	return (uscript_hasScript(c, USCRIPT_HAN) \|\|
	uscript_hasScript(c, USCRIPT_HIRAGANA) \|\|
	uscript_hasScript(c, USCRIPT_KATAKANA));
	}

	static bool initIndexForLocale(const char *locale) {
	if (locale == NULL) {
	return false;
	}

	if (gIndex != NULL && gIndex->isLocale(locale)) {
	return true;
	}

	UErrorCode status = U_ZERO_ERROR;
	sp<AlphabeticIndexRef> newIndex(new AlphabeticIndexRef(locale, status));
	if (newIndex == NULL \|\| U_FAILURE(status)) {
	return false;
	}
	// Always create labels for Latin characters if not present in native set
	(*newIndex)->addLabels("en", status);
	if (U_FAILURE(status)) {
	return false;
	}
	if ((*newIndex)->getBucketCount(status) <= 0 \|\| U_FAILURE(status)) {
	return false;
	}

	gIndex = newIndex;
	return true;
	}

	int32_t GetPhonebookIndex(UCharIterator iter, const char locale,
	UChar out, int32_t size, UBool isError)
	{
	if (size < MIN_OUTPUT_SIZE) {
	*isError = TRUE;
	return 0;
	}

	*isError = FALSE;
	out[0] = '\0';
	iter->move(iter, 0, UITER_ZERO);
	if (!iter->hasNext(iter)) { // Empty input string
	return 0;
	}
	UnicodeString ustr;
	bool prefixIsNonNumeric = false;
	bool prefixIsNumeric = false;
	while (iter->hasNext(iter)) {
	UChar32 ch = uiter_next32(iter);
	// Ignore standard phone number separators and identify any string
	// that otherwise starts with a number.
	if (!prefixIsNumeric && !prefixIsNonNumeric) {
	if (u_isdigit(ch)) {
	prefixIsNumeric = true;
	} else if (!u_isspace(ch) && ch != '+' && ch != '(' &&
	ch != ')' && ch != '.' && ch != '-' && ch != '#') {
	prefixIsNonNumeric = true;
	}
	}
	ustr.append(ch);
	}
	if (prefixIsNumeric) {
	out[0] = '#';
	return 1;
	}

	Mutex::Autolock autolock(gIndexMutex);
	if (!initIndexForLocale(locale)) {
	*isError = TRUE;
	return 0;
	}

	UErrorCode status = U_ZERO_ERROR;
	int32_t bucketIndex = (*gIndex)->getBucketIndex(ustr, status);
	if (U_FAILURE(status)) {
	*isError = TRUE;
	return 0;
	}

	int32_t len = gIndex->getLabel(bucketIndex, out, size);
	if (len < 0) {
	*isError = TRUE;
	return 0;
	}

	// For Japanese, label unclassified CJK ideographs with
	// Japanese word meaning "misc" or "other"
	if (gIndex->isJapanese() && len == 0 && is_CJ(ustr.char32At(0))) {
	out[0] = 0x4ED6;
	len = 1;
	}

	return len;
	}

	} // namespace android