2 * Copyright (C) 2010 The Android Open Source Project
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License
17 package com.cyanogenmod.eleven.locale;
19 import android.provider.ContactsContract.FullNameStyle;
20 import android.provider.ContactsContract.PhoneticNameStyle;
21 import android.text.TextUtils;
22 import android.util.Log;
24 import com.cyanogenmod.eleven.locale.HanziToPinyin.Token;
26 import com.google.common.annotations.VisibleForTesting;
28 import java.lang.Character.UnicodeBlock;
29 import java.util.ArrayList;
30 import java.util.Collections;
31 import java.util.HashSet;
32 import java.util.Iterator;
33 import java.util.Locale;
36 import android.icu.text.AlphabeticIndex;
37 import android.icu.text.AlphabeticIndex.ImmutableIndex;
38 import android.icu.text.Transliterator;
41 * This utility class provides specialized handling for locale specific
42 * information: labels, name lookup keys.
44 * This class has been modified from ContactLocaleUtils.java for now to rip out
45 * Chinese/Japanese specific Alphabetic Indexers because the MediaProvider's sort
46 * is using a Collator sort which can result in confusing behavior, so for now we will
47 * simplify and batch up those results until we later support our own internal databases
48 * An example of what This is, if we have songs "Able", "Xylophone" and "上" in
49 * simplified chinese language The media provider would give it to us in that order sorted,
50 * but the ICU lib would return "A", "X", "S". Unless we write our own db or do our own sort
51 * there is no good easy solution
53 public class LocaleUtils {
54 public static final String TAG = "MusicLocale";
56 public static final Locale LOCALE_ARABIC = new Locale("ar");
57 public static final Locale LOCALE_GREEK = new Locale("el");
58 public static final Locale LOCALE_HEBREW = new Locale("he");
59 // Serbian and Ukrainian labels are complementary supersets of Russian
60 public static final Locale LOCALE_SERBIAN = new Locale("sr");
61 public static final Locale LOCALE_UKRAINIAN = new Locale("uk");
62 public static final Locale LOCALE_THAI = new Locale("th");
65 * This class is the default implementation and should be the base class
68 * sortKey: same as name
69 * nameLookupKeys: none
70 * labels: uses ICU AlphabeticIndex for labels and extends by labeling
71 * phone numbers "#". Eg English labels are: [A-Z], #, " "
73 private static class LocaleUtilsBase {
74 private static final String EMPTY_STRING = "";
75 private static final String NUMBER_STRING = "#";
77 protected final ImmutableIndex mAlphabeticIndex;
78 private final int mAlphabeticIndexBucketCount;
79 private final int mNumberBucketIndex;
80 private final boolean mEnableSecondaryLocalePinyin;
82 public LocaleUtilsBase(LocaleSet locales) {
83 // AlphabeticIndex.getBucketLabel() uses a binary search across
84 // the entire label set so care should be taken about growing this
85 // set too large. The following set determines for which locales
86 // we will show labels other than your primary locale. General rules
87 // of thumb for adding a locale: should be a supported locale; and
88 // should not be included if from a name it is not deterministic
89 // which way to label it (so eg Chinese cannot be added because
90 // the labeling of a Chinese character varies between Simplified,
91 // Traditional, and Japanese locales). Use English only for all
92 // Latin based alphabets. Ukrainian and Serbian are chosen for
93 // Cyrillic because their alphabets are complementary supersets
95 final Locale secondaryLocale = locales.getSecondaryLocale();
96 mEnableSecondaryLocalePinyin = locales.isSecondaryLocaleSimplifiedChinese();
97 AlphabeticIndex ai = new AlphabeticIndex(locales.getPrimaryLocale())
98 .setMaxLabelCount(300);
99 if (secondaryLocale != null) {
100 ai.addLabels(secondaryLocale);
102 mAlphabeticIndex = ai.addLabels(Locale.ENGLISH)
103 .addLabels(Locale.JAPANESE)
104 .addLabels(Locale.KOREAN)
105 .addLabels(LOCALE_THAI)
106 .addLabels(LOCALE_ARABIC)
107 .addLabels(LOCALE_HEBREW)
108 .addLabels(LOCALE_GREEK)
109 .addLabels(LOCALE_UKRAINIAN)
110 .addLabels(LOCALE_SERBIAN)
111 .buildImmutableIndex();
112 mAlphabeticIndexBucketCount = mAlphabeticIndex.getBucketCount();
113 mNumberBucketIndex = mAlphabeticIndexBucketCount - 1;
116 public String getSortKey(String name) {
121 * Returns the bucket index for the specified string. AlphabeticIndex
122 * sorts strings into buckets numbered in order from 0 to N, where the
123 * exact value of N depends on how many representative index labels are
124 * used in a particular locale. This routine adds one additional bucket
125 * for phone numbers. It attempts to detect phone numbers and shifts
126 * the bucket indexes returned by AlphabeticIndex in order to make room
127 * for the new # bucket, so the returned range becomes 0 to N+1.
129 public int getBucketIndex(String name) {
133 boolean prefixIsNumeric = false;
134 final int length = name.length();
136 while (offset < length) {
137 int codePoint = Character.codePointAt(name, offset);
138 // Ignore standard phone number separators and identify any
139 // string that otherwise starts with a number.
140 if (Character.isDigit(codePoint)) {
141 prefixIsNumeric = true;
143 } else if (!Character.isSpaceChar(codePoint) &&
144 codePoint != '+' && codePoint != '(' &&
145 codePoint != ')' && codePoint != '.' &&
146 codePoint != '-' && codePoint != '#') {
149 offset += Character.charCount(codePoint);
151 if (prefixIsNumeric) {
152 return mNumberBucketIndex;
156 * TODO: ICU 52 AlphabeticIndex doesn't support Simplified Chinese
157 * as a secondary locale. Remove the following if that is added.
159 if (mEnableSecondaryLocalePinyin) {
160 name = HanziToPinyin.getInstance().transliterate(name);
162 final int bucket = mAlphabeticIndex.getBucketIndex(name);
166 if (bucket >= mNumberBucketIndex) {
173 * Returns the number of buckets in use (one more than AlphabeticIndex
174 * uses, because this class adds a bucket for phone numbers).
176 public int getBucketCount() {
177 return mAlphabeticIndexBucketCount + 1;
181 * Returns the label for the specified bucket index if a valid index,
182 * otherwise returns an empty string. '#' is returned for the phone
183 * number bucket; for all others, the AlphabeticIndex label is returned.
185 public String getBucketLabel(int bucketIndex) {
186 if (bucketIndex < 0 || bucketIndex >= getBucketCount()) {
188 } else if (bucketIndex == mNumberBucketIndex) {
189 return NUMBER_STRING;
190 } else if (bucketIndex > mNumberBucketIndex) {
193 return mAlphabeticIndex.getBucket(bucketIndex).getLabel();
196 @SuppressWarnings("unused")
197 public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
201 public ArrayList<String> getLabels() {
202 final int bucketCount = getBucketCount();
203 final ArrayList<String> labels = new ArrayList<String>(bucketCount);
204 for(int i = 0; i < bucketCount; ++i) {
205 labels.add(getBucketLabel(i));
212 * Japanese specific locale overrides.
214 * sortKey: unchanged (same as name)
215 * nameLookupKeys: unchanged (none)
216 * labels: extends default labels by labeling unlabeled CJ characters
217 * with the Japanese character 他 ("misc"). Japanese labels are:
218 * あ, か, さ, た, な, は, ま, や, ら, わ, 他, [A-Z], #, " "
220 private static class JapaneseContactUtils extends LocaleUtilsBase {
221 // \u4ed6 is Japanese character 他 ("misc")
222 private static final String JAPANESE_MISC_LABEL = "\u4ed6";
223 private final int mMiscBucketIndex;
225 public JapaneseContactUtils(LocaleSet locales) {
227 // Determine which bucket AlphabeticIndex is lumping unclassified
228 // Japanese characters into by looking up the bucket index for
229 // a representative Kanji/CJK unified ideograph (\u65e5 is the
231 mMiscBucketIndex = super.getBucketIndex("\u65e5");
234 // Set of UnicodeBlocks for unified CJK (Chinese) characters and
235 // Japanese characters. This includes all code blocks that might
236 // contain a character used in Japanese (which is why unified CJK
237 // blocks are included but Korean Hangul and jamo are not).
238 private static final Set<Character.UnicodeBlock> CJ_BLOCKS;
240 Set<UnicodeBlock> set = new HashSet<UnicodeBlock>();
241 set.add(UnicodeBlock.HIRAGANA);
242 set.add(UnicodeBlock.KATAKANA);
243 set.add(UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS);
244 set.add(UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS);
245 set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS);
246 set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A);
247 set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B);
248 set.add(UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION);
249 set.add(UnicodeBlock.CJK_RADICALS_SUPPLEMENT);
250 set.add(UnicodeBlock.CJK_COMPATIBILITY);
251 set.add(UnicodeBlock.CJK_COMPATIBILITY_FORMS);
252 set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS);
253 set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT);
254 CJ_BLOCKS = Collections.unmodifiableSet(set);
258 * Helper routine to identify unlabeled Chinese or Japanese characters
259 * to put in a 'misc' bucket.
261 * @return true if the specified Unicode code point is Chinese or
264 private static boolean isChineseOrJapanese(int codePoint) {
265 return CJ_BLOCKS.contains(UnicodeBlock.of(codePoint));
269 * Returns the bucket index for the specified string. Adds an
270 * additional 'misc' bucket for Kanji characters to the base class set.
273 public int getBucketIndex(String name) {
274 final int bucketIndex = super.getBucketIndex(name);
275 if ((bucketIndex == mMiscBucketIndex &&
276 !isChineseOrJapanese(Character.codePointAt(name, 0))) ||
277 bucketIndex > mMiscBucketIndex) {
278 return bucketIndex + 1;
284 * Returns the number of buckets in use (one more than the base class
285 * uses, because this class adds a bucket for Kanji).
288 public int getBucketCount() {
289 return super.getBucketCount() + 1;
293 * Returns the label for the specified bucket index if a valid index,
294 * otherwise returns an empty string. '他' is returned for unclassified
295 * Kanji; for all others, the label determined by the base class is
299 public String getBucketLabel(int bucketIndex) {
300 if (bucketIndex == mMiscBucketIndex) {
301 return JAPANESE_MISC_LABEL;
302 } else if (bucketIndex > mMiscBucketIndex) {
305 return super.getBucketLabel(bucketIndex);
309 public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
310 // Hiragana and Katakana will be positively identified as Japanese.
311 if (nameStyle == PhoneticNameStyle.JAPANESE) {
312 return getRomajiNameLookupKeys(name);
317 private static boolean mInitializedTransliterator;
318 private static Transliterator mJapaneseTransliterator;
320 private static Transliterator getJapaneseTransliterator() {
321 synchronized(JapaneseContactUtils.class) {
322 if (!mInitializedTransliterator) {
323 mInitializedTransliterator = true;
324 Transliterator t = null;
326 t = Transliterator.getInstance("Hiragana-Latin; Katakana-Latin;"
328 } catch (RuntimeException e) {
329 Log.w(TAG, "Hiragana/Katakana-Latin transliterator data"
332 mJapaneseTransliterator = t;
334 return mJapaneseTransliterator;
338 public static Iterator<String> getRomajiNameLookupKeys(String name) {
339 final Transliterator t = getJapaneseTransliterator();
343 final String romajiName = t.transliterate(name);
344 if (TextUtils.isEmpty(romajiName) ||
345 TextUtils.equals(name, romajiName)) {
348 final HashSet<String> keys = new HashSet<String>();
349 keys.add(romajiName);
350 return keys.iterator();
355 * Simplified Chinese specific locale overrides. Uses ICU Transliterator
356 * for generating pinyin transliteration.
358 * sortKey: unchanged (same as name)
359 * nameLookupKeys: adds additional name lookup keys
360 * - Chinese character's pinyin and pinyin's initial character.
361 * - Latin word and initial character.
363 * Simplified Chinese labels are the same as English: [A-Z], #, " "
365 private static class SimplifiedChineseContactUtils
366 extends LocaleUtilsBase {
367 public SimplifiedChineseContactUtils(LocaleSet locales) {
372 public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
373 if (nameStyle != FullNameStyle.JAPANESE &&
374 nameStyle != FullNameStyle.KOREAN) {
375 return getPinyinNameLookupKeys(name);
380 public static Iterator<String> getPinyinNameLookupKeys(String name) {
381 // TODO : Reduce the object allocation.
382 HashSet<String> keys = new HashSet<String>();
383 ArrayList<Token> tokens = HanziToPinyin.getInstance().getTokens(name);
384 final int tokenCount = tokens.size();
385 final StringBuilder keyPinyin = new StringBuilder();
386 final StringBuilder keyInitial = new StringBuilder();
387 // There is no space among the Chinese Characters, the variant name
388 // lookup key wouldn't work for Chinese. The keyOriginal is used to
389 // build the lookup keys for itself.
390 final StringBuilder keyOriginal = new StringBuilder();
391 for (int i = tokenCount - 1; i >= 0; i--) {
392 final Token token = tokens.get(i);
393 if (Token.UNKNOWN == token.type) {
396 if (Token.PINYIN == token.type) {
397 keyPinyin.insert(0, token.target);
398 keyInitial.insert(0, token.target.charAt(0));
399 } else if (Token.LATIN == token.type) {
400 // Avoid adding space at the end of String.
401 if (keyPinyin.length() > 0) {
402 keyPinyin.insert(0, ' ');
404 if (keyOriginal.length() > 0) {
405 keyOriginal.insert(0, ' ');
407 keyPinyin.insert(0, token.source);
408 keyInitial.insert(0, token.source.charAt(0));
410 keyOriginal.insert(0, token.source);
411 keys.add(keyOriginal.toString());
412 keys.add(keyPinyin.toString());
413 keys.add(keyInitial.toString());
415 return keys.iterator();
419 private static final String JAPANESE_LANGUAGE = Locale.JAPANESE.getLanguage().toLowerCase();
420 private static LocaleUtils sSingleton;
422 private final LocaleSet mLocales;
423 private final LocaleUtilsBase mUtils;
425 private LocaleUtils(LocaleSet locales) {
426 if (locales == null) {
427 mLocales = LocaleSet.getDefault();
431 if (mLocales.isPrimaryLanguage(JAPANESE_LANGUAGE)) {
432 mUtils = new JapaneseContactUtils(mLocales);
433 } else if (mLocales.isPrimaryLocaleSimplifiedChinese()) {
434 mUtils = new SimplifiedChineseContactUtils(mLocales);
436 mUtils = new LocaleUtilsBase(mLocales);
438 Log.i(TAG, "AddressBook Labels [" + mLocales.toString() + "]: "
439 + getLabels().toString());
442 public boolean isLocale(LocaleSet locales) {
443 return mLocales.equals(locales);
446 public static synchronized LocaleUtils getInstance() {
447 if (sSingleton == null) {
448 sSingleton = new LocaleUtils(LocaleSet.getDefault());
454 public static synchronized void setLocale(Locale locale) {
455 setLocales(new LocaleSet(locale));
458 public static synchronized void setLocales(LocaleSet locales) {
459 if (sSingleton == null || !sSingleton.isLocale(locales)) {
460 sSingleton = new LocaleUtils(locales);
464 public String getSortKey(String name, int nameStyle) {
465 return mUtils.getSortKey(name);
468 public int getBucketIndex(String name) {
469 return mUtils.getBucketIndex(name);
472 public int getBucketCount() {
473 return mUtils.getBucketCount();
476 public String getBucketLabel(int bucketIndex) {
477 return mUtils.getBucketLabel(bucketIndex);
480 public String getLabel(String name) {
481 return getBucketLabel(getBucketIndex(name));
484 public ArrayList<String> getLabels() {
485 return mUtils.getLabels();