src/com/cyanogenmod/eleven/locale/LocaleUtils.java

   1 /*
   2  * Copyright (C) 2010 The Android Open Source Project
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License
  15  */
  16
  17 package com.cyanogenmod.eleven.locale;
  18
  19 import android.provider.ContactsContract.FullNameStyle;
  20 import android.provider.ContactsContract.PhoneticNameStyle;
  21 import android.text.TextUtils;
  22 import android.util.Log;
  23
  24 import com.cyanogenmod.eleven.locale.HanziToPinyin.Token;
  25
  26 import com.google.common.annotations.VisibleForTesting;
  27
  28 import java.lang.Character.UnicodeBlock;
  29 import java.util.ArrayList;
  30 import java.util.Collections;
  31 import java.util.HashSet;
  32 import java.util.Iterator;
  33 import java.util.Locale;
  34 import java.util.Set;
  35
  36 import android.icu.text.AlphabeticIndex;
  37 import android.icu.text.AlphabeticIndex.ImmutableIndex;
  38 import android.icu.text.Transliterator;
  39
  40 /**
  41  * This utility class provides specialized handling for locale specific
  42  * information: labels, name lookup keys.
  43  *
  44  * This class has been modified from ContactLocaleUtils.java for now to rip out
  45  * Chinese/Japanese specific Alphabetic Indexers because the MediaProvider's sort
  46  * is using a Collator sort which can result in confusing behavior, so for now we will
  47  * simplify and batch up those results until we later support our own internal databases
  48  * An example of what This is, if we have songs "Able", "Xylophone" and "上" in
  49  * simplified chinese language The media provider would give it to us in that order sorted,
  50  * but the ICU lib would return "A", "X", "S".  Unless we write our own db or do our own sort
  51  * there is no good easy solution
  52  */
  53 public class LocaleUtils {
  54     public static final String TAG = "MusicLocale";
  55
  56     public static final Locale LOCALE_ARABIC = new Locale("ar");
  57     public static final Locale LOCALE_GREEK = new Locale("el");
  58     public static final Locale LOCALE_HEBREW = new Locale("he");
  59     // Serbian and Ukrainian labels are complementary supersets of Russian
  60     public static final Locale LOCALE_SERBIAN = new Locale("sr");
  61     public static final Locale LOCALE_UKRAINIAN = new Locale("uk");
  62     public static final Locale LOCALE_THAI = new Locale("th");
  63
  64     /**
  65      * This class is the default implementation and should be the base class
  66      * for other locales.
  67      *
  68      * sortKey: same as name
  69      * nameLookupKeys: none
  70      * labels: uses ICU AlphabeticIndex for labels and extends by labeling
  71      *     phone numbers "#".  Eg English labels are: [A-Z], #, " "
  72      */
  73     private static class LocaleUtilsBase {
  74         private static final String EMPTY_STRING = "";
  75         private static final String NUMBER_STRING = "#";
  76
  77         protected final ImmutableIndex mAlphabeticIndex;
  78         private final int mAlphabeticIndexBucketCount;
  79         private final int mNumberBucketIndex;
  80         private final boolean mEnableSecondaryLocalePinyin;
  81
  82         public LocaleUtilsBase(LocaleSet locales) {
  83             // AlphabeticIndex.getBucketLabel() uses a binary search across
  84             // the entire label set so care should be taken about growing this
  85             // set too large. The following set determines for which locales
  86             // we will show labels other than your primary locale. General rules
  87             // of thumb for adding a locale: should be a supported locale; and
  88             // should not be included if from a name it is not deterministic
  89             // which way to label it (so eg Chinese cannot be added because
  90             // the labeling of a Chinese character varies between Simplified,
  91             // Traditional, and Japanese locales). Use English only for all
  92             // Latin based alphabets. Ukrainian and Serbian are chosen for
  93             // Cyrillic because their alphabets are complementary supersets
  94             // of Russian.
  95             final Locale secondaryLocale = locales.getSecondaryLocale();
  96             mEnableSecondaryLocalePinyin = locales.isSecondaryLocaleSimplifiedChinese();
  97             AlphabeticIndex ai = new AlphabeticIndex(locales.getPrimaryLocale())
  98                 .setMaxLabelCount(300);
  99             if (secondaryLocale != null) {
 100                 ai.addLabels(secondaryLocale);
 101             }
 102             mAlphabeticIndex = ai.addLabels(Locale.ENGLISH)
 103                 .addLabels(Locale.JAPANESE)
 104                 .addLabels(Locale.KOREAN)
 105                 .addLabels(LOCALE_THAI)
 106                 .addLabels(LOCALE_ARABIC)
 107                 .addLabels(LOCALE_HEBREW)
 108                 .addLabels(LOCALE_GREEK)
 109                 .addLabels(LOCALE_UKRAINIAN)
 110                 .addLabels(LOCALE_SERBIAN)
 111                 .buildImmutableIndex();
 112             mAlphabeticIndexBucketCount = mAlphabeticIndex.getBucketCount();
 113             mNumberBucketIndex = mAlphabeticIndexBucketCount - 1;
 114         }
 115
 116         public String getSortKey(String name) {
 117             return name;
 118         }
 119
 120         /**
 121          * Returns the bucket index for the specified string. AlphabeticIndex
 122          * sorts strings into buckets numbered in order from 0 to N, where the
 123          * exact value of N depends on how many representative index labels are
 124          * used in a particular locale. This routine adds one additional bucket
 125          * for phone numbers. It attempts to detect phone numbers and shifts
 126          * the bucket indexes returned by AlphabeticIndex in order to make room
 127          * for the new # bucket, so the returned range becomes 0 to N+1.
 128          */
 129         public int getBucketIndex(String name) {
 130             if (name == null) {
 131                 return -1;
 132             }
 133             boolean prefixIsNumeric = false;
 134             final int length = name.length();
 135             int offset = 0;
 136             while (offset < length) {
 137                 int codePoint = Character.codePointAt(name, offset);
 138                 // Ignore standard phone number separators and identify any
 139                 // string that otherwise starts with a number.
 140                 if (Character.isDigit(codePoint)) {
 141                     prefixIsNumeric = true;
 142                     break;
 143                 } else if (!Character.isSpaceChar(codePoint) &&
 144                            codePoint != '+' && codePoint != '(' &&
 145                            codePoint != ')' && codePoint != '.' &&
 146                            codePoint != '-' && codePoint != '#') {
 147                     break;
 148                 }
 149                 offset += Character.charCount(codePoint);
 150             }
 151             if (prefixIsNumeric) {
 152                 return mNumberBucketIndex;
 153             }
 154
 155             /**
 156              * TODO: ICU 52 AlphabeticIndex doesn't support Simplified Chinese
 157              * as a secondary locale. Remove the following if that is added.
 158              */
 159             if (mEnableSecondaryLocalePinyin) {
 160                 name = HanziToPinyin.getInstance().transliterate(name);
 161             }
 162             final int bucket = mAlphabeticIndex.getBucketIndex(name);
 163             if (bucket < 0) {
 164                 return -1;
 165             }
 166             if (bucket >= mNumberBucketIndex) {
 167                 return bucket + 1;
 168             }
 169             return bucket;
 170         }
 171
 172         /**
 173          * Returns the number of buckets in use (one more than AlphabeticIndex
 174          * uses, because this class adds a bucket for phone numbers).
 175          */
 176         public int getBucketCount() {
 177             return mAlphabeticIndexBucketCount + 1;
 178         }
 179
 180         /**
 181          * Returns the label for the specified bucket index if a valid index,
 182          * otherwise returns an empty string. '#' is returned for the phone
 183          * number bucket; for all others, the AlphabeticIndex label is returned.
 184          */
 185         public String getBucketLabel(int bucketIndex) {
 186             if (bucketIndex < 0 || bucketIndex >= getBucketCount()) {
 187                 return EMPTY_STRING;
 188             } else if (bucketIndex == mNumberBucketIndex) {
 189                 return NUMBER_STRING;
 190             } else if (bucketIndex > mNumberBucketIndex) {
 191                 --bucketIndex;
 192             }
 193             return mAlphabeticIndex.getBucket(bucketIndex).getLabel();
 194         }
 195
 196         @SuppressWarnings("unused")
 197         public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
 198             return null;
 199         }
 200
 201         public ArrayList<String> getLabels() {
 202             final int bucketCount = getBucketCount();
 203             final ArrayList<String> labels = new ArrayList<String>(bucketCount);
 204             for(int i = 0; i < bucketCount; ++i) {
 205                 labels.add(getBucketLabel(i));
 206             }
 207             return labels;
 208         }
 209     }
 210
 211     /**
 212      * Japanese specific locale overrides.
 213      *
 214      * sortKey: unchanged (same as name)
 215      * nameLookupKeys: unchanged (none)
 216      * labels: extends default labels by labeling unlabeled CJ characters
 217      *     with the Japanese character 他 ("misc"). Japanese labels are:
 218      *     あ, か, さ, た, な, は, ま, や, ら, わ, 他, [A-Z], #, " "
 219      */
 220     private static class JapaneseContactUtils extends LocaleUtilsBase {
 221         // \u4ed6 is Japanese character 他 ("misc")
 222         private static final String JAPANESE_MISC_LABEL = "\u4ed6";
 223         private final int mMiscBucketIndex;
 224
 225         public JapaneseContactUtils(LocaleSet locales) {
 226             super(locales);
 227             // Determine which bucket AlphabeticIndex is lumping unclassified
 228             // Japanese characters into by looking up the bucket index for
 229             // a representative Kanji/CJK unified ideograph (\u65e5 is the
 230             // character '日').
 231             mMiscBucketIndex = super.getBucketIndex("\u65e5");
 232         }
 233
 234         // Set of UnicodeBlocks for unified CJK (Chinese) characters and
 235         // Japanese characters. This includes all code blocks that might
 236         // contain a character used in Japanese (which is why unified CJK
 237         // blocks are included but Korean Hangul and jamo are not).
 238         private static final Set<Character.UnicodeBlock> CJ_BLOCKS;
 239         static {
 240             Set<UnicodeBlock> set = new HashSet<UnicodeBlock>();
 241             set.add(UnicodeBlock.HIRAGANA);
 242             set.add(UnicodeBlock.KATAKANA);
 243             set.add(UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS);
 244             set.add(UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS);
 245             set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS);
 246             set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A);
 247             set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B);
 248             set.add(UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION);
 249             set.add(UnicodeBlock.CJK_RADICALS_SUPPLEMENT);
 250             set.add(UnicodeBlock.CJK_COMPATIBILITY);
 251             set.add(UnicodeBlock.CJK_COMPATIBILITY_FORMS);
 252             set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS);
 253             set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT);
 254             CJ_BLOCKS = Collections.unmodifiableSet(set);
 255         }
 256
 257         /**
 258          * Helper routine to identify unlabeled Chinese or Japanese characters
 259          * to put in a 'misc' bucket.
 260          *
 261          * @return true if the specified Unicode code point is Chinese or
 262          *              Japanese
 263          */
 264         private static boolean isChineseOrJapanese(int codePoint) {
 265             return CJ_BLOCKS.contains(UnicodeBlock.of(codePoint));
 266         }
 267
 268         /**
 269          * Returns the bucket index for the specified string. Adds an
 270          * additional 'misc' bucket for Kanji characters to the base class set.
 271          */
 272         @Override
 273         public int getBucketIndex(String name) {
 274             final int bucketIndex = super.getBucketIndex(name);
 275             if ((bucketIndex == mMiscBucketIndex &&
 276                  !isChineseOrJapanese(Character.codePointAt(name, 0))) ||
 277                 bucketIndex > mMiscBucketIndex) {
 278                 return bucketIndex + 1;
 279             }
 280             return bucketIndex;
 281         }
 282
 283         /**
 284          * Returns the number of buckets in use (one more than the base class
 285          * uses, because this class adds a bucket for Kanji).
 286          */
 287         @Override
 288         public int getBucketCount() {
 289             return super.getBucketCount() + 1;
 290         }
 291
 292         /**
 293          * Returns the label for the specified bucket index if a valid index,
 294          * otherwise returns an empty string. '他' is returned for unclassified
 295          * Kanji; for all others, the label determined by the base class is
 296          * returned.
 297          */
 298         @Override
 299         public String getBucketLabel(int bucketIndex) {
 300             if (bucketIndex == mMiscBucketIndex) {
 301                 return JAPANESE_MISC_LABEL;
 302             } else if (bucketIndex > mMiscBucketIndex) {
 303                 --bucketIndex;
 304             }
 305             return super.getBucketLabel(bucketIndex);
 306         }
 307
 308         @Override
 309         public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
 310             // Hiragana and Katakana will be positively identified as Japanese.
 311             if (nameStyle == PhoneticNameStyle.JAPANESE) {
 312                 return getRomajiNameLookupKeys(name);
 313             }
 314             return null;
 315         }
 316
 317         private static boolean mInitializedTransliterator;
 318         private static Transliterator mJapaneseTransliterator;
 319
 320         private static Transliterator getJapaneseTransliterator() {
 321             synchronized(JapaneseContactUtils.class) {
 322                 if (!mInitializedTransliterator) {
 323                     mInitializedTransliterator = true;
 324                     Transliterator t = null;
 325                     try {
 326                         t = Transliterator.getInstance("Hiragana-Latin; Katakana-Latin;"
 327                                 + " Latin-Ascii");
 328                     } catch (RuntimeException e) {
 329                         Log.w(TAG, "Hiragana/Katakana-Latin transliterator data"
 330                                 + " is missing");
 331                     }
 332                     mJapaneseTransliterator = t;
 333                 }
 334                 return mJapaneseTransliterator;
 335             }
 336         }
 337
 338         public static Iterator<String> getRomajiNameLookupKeys(String name) {
 339             final Transliterator t = getJapaneseTransliterator();
 340             if (t == null) {
 341                 return null;
 342             }
 343             final String romajiName = t.transliterate(name);
 344             if (TextUtils.isEmpty(romajiName) ||
 345                     TextUtils.equals(name, romajiName)) {
 346                 return null;
 347             }
 348             final HashSet<String> keys = new HashSet<String>();
 349             keys.add(romajiName);
 350             return keys.iterator();
 351         }
 352     }
 353
 354     /**
 355      * Simplified Chinese specific locale overrides. Uses ICU Transliterator
 356      * for generating pinyin transliteration.
 357      *
 358      * sortKey: unchanged (same as name)
 359      * nameLookupKeys: adds additional name lookup keys
 360      *     - Chinese character's pinyin and pinyin's initial character.
 361      *     - Latin word and initial character.
 362      * labels: unchanged
 363      *     Simplified Chinese labels are the same as English: [A-Z], #, " "
 364      */
 365     private static class SimplifiedChineseContactUtils
 366         extends LocaleUtilsBase {
 367         public SimplifiedChineseContactUtils(LocaleSet locales) {
 368             super(locales);
 369         }
 370
 371         @Override
 372         public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
 373             if (nameStyle != FullNameStyle.JAPANESE &&
 374                     nameStyle != FullNameStyle.KOREAN) {
 375                 return getPinyinNameLookupKeys(name);
 376             }
 377             return null;
 378         }
 379
 380         public static Iterator<String> getPinyinNameLookupKeys(String name) {
 381             // TODO : Reduce the object allocation.
 382             HashSet<String> keys = new HashSet<String>();
 383             ArrayList<Token> tokens = HanziToPinyin.getInstance().getTokens(name);
 384             final int tokenCount = tokens.size();
 385             final StringBuilder keyPinyin = new StringBuilder();
 386             final StringBuilder keyInitial = new StringBuilder();
 387             // There is no space among the Chinese Characters, the variant name
 388             // lookup key wouldn't work for Chinese. The keyOriginal is used to
 389             // build the lookup keys for itself.
 390             final StringBuilder keyOriginal = new StringBuilder();
 391             for (int i = tokenCount - 1; i >= 0; i--) {
 392                 final Token token = tokens.get(i);
 393                 if (Token.UNKNOWN == token.type) {
 394                     continue;
 395                 }
 396                 if (Token.PINYIN == token.type) {
 397                     keyPinyin.insert(0, token.target);
 398                     keyInitial.insert(0, token.target.charAt(0));
 399                 } else if (Token.LATIN == token.type) {
 400                     // Avoid adding space at the end of String.
 401                     if (keyPinyin.length() > 0) {
 402                         keyPinyin.insert(0, ' ');
 403                     }
 404                     if (keyOriginal.length() > 0) {
 405                         keyOriginal.insert(0, ' ');
 406                     }
 407                     keyPinyin.insert(0, token.source);
 408                     keyInitial.insert(0, token.source.charAt(0));
 409                 }
 410                 keyOriginal.insert(0, token.source);
 411                 keys.add(keyOriginal.toString());
 412                 keys.add(keyPinyin.toString());
 413                 keys.add(keyInitial.toString());
 414             }
 415             return keys.iterator();
 416         }
 417     }
 418
 419     private static final String JAPANESE_LANGUAGE = Locale.JAPANESE.getLanguage().toLowerCase();
 420     private static LocaleUtils sSingleton;
 421
 422     private final LocaleSet mLocales;
 423     private final LocaleUtilsBase mUtils;
 424
 425     private LocaleUtils(LocaleSet locales) {
 426         if (locales == null) {
 427             mLocales = LocaleSet.getDefault();
 428         } else {
 429             mLocales = locales;
 430         }
 431         if (mLocales.isPrimaryLanguage(JAPANESE_LANGUAGE)) {
 432             mUtils = new JapaneseContactUtils(mLocales);
 433         } else if (mLocales.isPrimaryLocaleSimplifiedChinese()) {
 434             mUtils = new SimplifiedChineseContactUtils(mLocales);
 435         } else {
 436             mUtils = new LocaleUtilsBase(mLocales);
 437         }
 438         Log.i(TAG, "AddressBook Labels [" + mLocales.toString() + "]: "
 439                 + getLabels().toString());
 440     }
 441
 442     public boolean isLocale(LocaleSet locales) {
 443         return mLocales.equals(locales);
 444     }
 445
 446     public static synchronized LocaleUtils getInstance() {
 447         if (sSingleton == null) {
 448             sSingleton = new LocaleUtils(LocaleSet.getDefault());
 449         }
 450         return sSingleton;
 451     }
 452
 453     @VisibleForTesting
 454     public static synchronized void setLocale(Locale locale) {
 455         setLocales(new LocaleSet(locale));
 456     }
 457
 458     public static synchronized void setLocales(LocaleSet locales) {
 459         if (sSingleton == null || !sSingleton.isLocale(locales)) {
 460             sSingleton = new LocaleUtils(locales);
 461         }
 462     }
 463
 464     public String getSortKey(String name, int nameStyle) {
 465         return mUtils.getSortKey(name);
 466     }
 467
 468     public int getBucketIndex(String name) {
 469         return mUtils.getBucketIndex(name);
 470     }
 471
 472     public int getBucketCount() {
 473         return mUtils.getBucketCount();
 474     }
 475
 476     public String getBucketLabel(int bucketIndex) {
 477         return mUtils.getBucketLabel(bucketIndex);
 478     }
 479
 480     public String getLabel(String name) {
 481         return getBucketLabel(getBucketIndex(name));
 482     }
 483
 484     public ArrayList<String> getLabels() {
 485         return mUtils.getLabels();
 486     }
 487 }