blob: 161bb58cf0a9fa85dd1ab6ee8c85fa24b029b77a [file] [log] [blame]
/*
* Copyright (C) 2011 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.providers.contacts;
import android.text.TextUtils;
import android.util.Log;
import java.util.ArrayList;
import java.util.Locale;
import libcore.icu.Transliterator;
/**
* An object to convert Chinese character to its corresponding pinyin string.
* For characters with multiple possible pinyin string, only one is selected
* according to ICU Transliterator class. Polyphone is not supported in this
* implementation.
*/
public class HanziToPinyin {
private static final String TAG = "HanziToPinyin";
private static HanziToPinyin sInstance;
private final Transliterator mPinyinTransliterator;
public static class Token {
/**
* Separator between target string for each source char
*/
public static final String SEPARATOR = " ";
public static final int LATIN = 1;
public static final int PINYIN = 2;
public static final int UNKNOWN = 3;
public Token() {
}
public Token(int type, String source, String target) {
this.type = type;
this.source = source;
this.target = target;
}
/**
* Type of this token, ASCII, PINYIN or UNKNOWN.
*/
public int type;
/**
* Original string before translation.
*/
public String source;
/**
* Translated string of source. For Han, target is corresponding Pinyin. Otherwise target is
* original string in source.
*/
public String target;
}
private HanziToPinyin() {
Transliterator t = null;
try {
t = new Transliterator("Han-Latin/Names; Latin-Ascii; Any-Upper");
} catch (RuntimeException e) {
Log.w(TAG, "Han-Latin/Names transliterator data is missing,"
+ " HanziToPinyin is disabled");
}
mPinyinTransliterator = t;
}
public boolean hasChineseTransliterator() {
return mPinyinTransliterator != null;
}
public static HanziToPinyin getInstance() {
synchronized (HanziToPinyin.class) {
if (sInstance == null) {
sInstance = new HanziToPinyin();
}
return sInstance;
}
}
private Token getToken(char character) {
Token token = new Token();
token.source = Character.toString(character);
if (character < 256) {
token.type = Token.LATIN;
token.target = token.source;
return token;
}
token.type = Token.PINYIN;
token.target = mPinyinTransliterator.transliterate(token.source);
if (TextUtils.isEmpty(token.target) ||
TextUtils.equals(token.source, token.target)) {
token.type = Token.UNKNOWN;
token.target = token.source;
}
return token;
}
/**
* Convert the input to a array of tokens. The sequence of ASCII or Unknown characters without
* space will be put into a Token, One Hanzi character which has pinyin will be treated as a
* Token. If there is no Chinese transliterator, the empty token array is returned.
*/
public ArrayList<Token> get(final String input) {
ArrayList<Token> tokens = new ArrayList<Token>();
if (!hasChineseTransliterator() || TextUtils.isEmpty(input)) {
// return empty tokens.
return tokens;
}
final int inputLength = input.length();
final StringBuilder sb = new StringBuilder();
int tokenType = Token.LATIN;
// Go through the input, create a new token when
// a. Token type changed
// b. Get the Pinyin of current charater.
// c. current character is space.
for (int i = 0; i < inputLength; i++) {
final char character = input.charAt(i);
if (character == ' ') {
if (sb.length() > 0) {
addToken(sb, tokens, tokenType);
}
} else if (character < 256) {
if (tokenType != Token.LATIN && sb.length() > 0) {
addToken(sb, tokens, tokenType);
}
tokenType = Token.LATIN;
sb.append(character);
} else {
Token t = getToken(character);
if (t.type == Token.PINYIN) {
if (sb.length() > 0) {
addToken(sb, tokens, tokenType);
}
tokens.add(t);
tokenType = Token.PINYIN;
} else {
if (tokenType != t.type && sb.length() > 0) {
addToken(sb, tokens, tokenType);
}
tokenType = t.type;
sb.append(character);
}
}
}
if (sb.length() > 0) {
addToken(sb, tokens, tokenType);
}
return tokens;
}
private void addToken(
final StringBuilder sb, final ArrayList<Token> tokens, final int tokenType) {
String str = sb.toString();
tokens.add(new Token(tokenType, str, str));
sb.setLength(0);
}
}