| /* |
| * Copyright (C) 2011 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package com.android.providers.contacts; |
| |
| import android.text.TextUtils; |
| import android.util.Log; |
| |
| import java.util.ArrayList; |
| import java.util.Locale; |
| |
| import libcore.icu.Transliterator; |
| |
| /** |
| * An object to convert Chinese character to its corresponding pinyin string. |
| * For characters with multiple possible pinyin string, only one is selected |
| * according to ICU Transliterator class. Polyphone is not supported in this |
| * implementation. |
| */ |
| public class HanziToPinyin { |
| private static final String TAG = "HanziToPinyin"; |
| |
| private static HanziToPinyin sInstance; |
| private final Transliterator mPinyinTransliterator; |
| |
| public static class Token { |
| /** |
| * Separator between target string for each source char |
| */ |
| public static final String SEPARATOR = " "; |
| |
| public static final int LATIN = 1; |
| public static final int PINYIN = 2; |
| public static final int UNKNOWN = 3; |
| |
| public Token() { |
| } |
| |
| public Token(int type, String source, String target) { |
| this.type = type; |
| this.source = source; |
| this.target = target; |
| } |
| |
| /** |
| * Type of this token, ASCII, PINYIN or UNKNOWN. |
| */ |
| public int type; |
| /** |
| * Original string before translation. |
| */ |
| public String source; |
| /** |
| * Translated string of source. For Han, target is corresponding Pinyin. Otherwise target is |
| * original string in source. |
| */ |
| public String target; |
| } |
| |
| private HanziToPinyin() { |
| Transliterator t = null; |
| try { |
| t = new Transliterator("Han-Latin/Names; Latin-Ascii; Any-Upper"); |
| } catch (RuntimeException e) { |
| Log.w(TAG, "Han-Latin/Names transliterator data is missing," |
| + " HanziToPinyin is disabled"); |
| } |
| mPinyinTransliterator = t; |
| } |
| |
| public boolean hasChineseTransliterator() { |
| return mPinyinTransliterator != null; |
| } |
| |
| public static HanziToPinyin getInstance() { |
| synchronized (HanziToPinyin.class) { |
| if (sInstance == null) { |
| sInstance = new HanziToPinyin(); |
| } |
| return sInstance; |
| } |
| } |
| |
| private Token getToken(char character) { |
| Token token = new Token(); |
| token.source = Character.toString(character); |
| |
| if (character < 256) { |
| token.type = Token.LATIN; |
| token.target = token.source; |
| return token; |
| } |
| |
| token.type = Token.PINYIN; |
| token.target = mPinyinTransliterator.transliterate(token.source); |
| if (TextUtils.isEmpty(token.target) || |
| TextUtils.equals(token.source, token.target)) { |
| token.type = Token.UNKNOWN; |
| token.target = token.source; |
| } |
| return token; |
| } |
| |
| /** |
| * Convert the input to a array of tokens. The sequence of ASCII or Unknown characters without |
| * space will be put into a Token, One Hanzi character which has pinyin will be treated as a |
| * Token. If there is no Chinese transliterator, the empty token array is returned. |
| */ |
| public ArrayList<Token> get(final String input) { |
| ArrayList<Token> tokens = new ArrayList<Token>(); |
| if (!hasChineseTransliterator() || TextUtils.isEmpty(input)) { |
| // return empty tokens. |
| return tokens; |
| } |
| final int inputLength = input.length(); |
| final StringBuilder sb = new StringBuilder(); |
| int tokenType = Token.LATIN; |
| // Go through the input, create a new token when |
| // a. Token type changed |
| // b. Get the Pinyin of current charater. |
| // c. current character is space. |
| for (int i = 0; i < inputLength; i++) { |
| final char character = input.charAt(i); |
| if (character == ' ') { |
| if (sb.length() > 0) { |
| addToken(sb, tokens, tokenType); |
| } |
| } else if (character < 256) { |
| if (tokenType != Token.LATIN && sb.length() > 0) { |
| addToken(sb, tokens, tokenType); |
| } |
| tokenType = Token.LATIN; |
| sb.append(character); |
| } else { |
| Token t = getToken(character); |
| if (t.type == Token.PINYIN) { |
| if (sb.length() > 0) { |
| addToken(sb, tokens, tokenType); |
| } |
| tokens.add(t); |
| tokenType = Token.PINYIN; |
| } else { |
| if (tokenType != t.type && sb.length() > 0) { |
| addToken(sb, tokens, tokenType); |
| } |
| tokenType = t.type; |
| sb.append(character); |
| } |
| } |
| } |
| if (sb.length() > 0) { |
| addToken(sb, tokens, tokenType); |
| } |
| return tokens; |
| } |
| |
| private void addToken( |
| final StringBuilder sb, final ArrayList<Token> tokens, final int tokenType) { |
| String str = sb.toString(); |
| tokens.add(new Token(tokenType, str, str)); |
| sb.setLength(0); |
| } |
| } |