2 * Copyright (C) 2011 The Android Open Source Project
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 package com.cyanogenmod.eleven.locale;
19 import android.text.TextUtils;
20 import android.util.Log;
22 import java.util.ArrayList;
24 import android.icu.text.Transliterator;
27 * An object to convert Chinese character to its corresponding pinyin string.
28 * For characters with multiple possible pinyin string, only one is selected
29 * according to ICU Transliterator class. Polyphone is not supported in this
32 public class HanziToPinyin {
33 private static final String TAG = "HanziToPinyin";
35 private static HanziToPinyin sInstance;
36 private Transliterator mPinyinTransliterator;
37 private Transliterator mAsciiTransliterator;
39 public static class Token {
41 * Separator between target string for each source char
43 public static final String SEPARATOR = " ";
45 public static final int LATIN = 1;
46 public static final int PINYIN = 2;
47 public static final int UNKNOWN = 3;
52 public Token(int type, String source, String target) {
59 * Type of this token, ASCII, PINYIN or UNKNOWN.
63 * Original string before translation.
67 * Translated string of source. For Han, target is corresponding Pinyin. Otherwise target is
68 * original string in source.
73 private HanziToPinyin() {
75 mPinyinTransliterator = Transliterator.getInstance("Han-Latin/Names; Latin-Ascii; Any-Upper");
76 mAsciiTransliterator = Transliterator.getInstance("Latin-Ascii");
77 } catch (RuntimeException e) {
78 Log.w(TAG, "Han-Latin/Names transliterator data is missing,"
79 + " HanziToPinyin is disabled");
83 public boolean hasChineseTransliterator() {
84 return mPinyinTransliterator != null;
87 public static HanziToPinyin getInstance() {
88 synchronized (HanziToPinyin.class) {
89 if (sInstance == null) {
90 sInstance = new HanziToPinyin();
96 private void tokenize(char character, Token token) {
97 token.source = Character.toString(character);
100 if (character < 128) {
101 token.type = Token.LATIN;
102 token.target = token.source;
106 // Extended Latin. Transcode these to ASCII equivalents
107 if (character < 0x250 || (0x1e00 <= character && character < 0x1eff)) {
108 token.type = Token.LATIN;
109 token.target = mAsciiTransliterator == null ? token.source :
110 mAsciiTransliterator.transliterate(token.source);
114 token.type = Token.PINYIN;
115 token.target = mPinyinTransliterator.transliterate(token.source);
116 if (TextUtils.isEmpty(token.target) ||
117 TextUtils.equals(token.source, token.target)) {
118 token.type = Token.UNKNOWN;
119 token.target = token.source;
123 public String transliterate(final String input) {
124 if (!hasChineseTransliterator() || TextUtils.isEmpty(input)) {
127 return mPinyinTransliterator.transliterate(input);
131 * Convert the input to a array of tokens. The sequence of ASCII or Unknown characters without
132 * space will be put into a Token, One Hanzi character which has pinyin will be treated as a
133 * Token. If there is no Chinese transliterator, the empty token array is returned.
135 public ArrayList<Token> getTokens(final String input) {
136 ArrayList<Token> tokens = new ArrayList<Token>();
137 if (!hasChineseTransliterator() || TextUtils.isEmpty(input)) {
138 // return empty tokens.
142 final int inputLength = input.length();
143 final StringBuilder sb = new StringBuilder();
144 int tokenType = Token.LATIN;
145 Token token = new Token();
147 // Go through the input, create a new token when
148 // a. Token type changed
149 // b. Get the Pinyin of current charater.
150 // c. current character is space.
151 for (int i = 0; i < inputLength; i++) {
152 final char character = input.charAt(i);
153 if (Character.isSpaceChar(character)) {
154 if (sb.length() > 0) {
155 addToken(sb, tokens, tokenType);
158 tokenize(character, token);
159 if (token.type == Token.PINYIN) {
160 if (sb.length() > 0) {
161 addToken(sb, tokens, tokenType);
166 if (tokenType != token.type && sb.length() > 0) {
167 addToken(sb, tokens, tokenType);
169 sb.append(token.target);
171 tokenType = token.type;
174 if (sb.length() > 0) {
175 addToken(sb, tokens, tokenType);
180 private void addToken(
181 final StringBuilder sb, final ArrayList<Token> tokens, final int tokenType) {
182 String str = sb.toString();
183 tokens.add(new Token(tokenType, str, str));