src/org/lineageos/eleven/locale/LocaleUtils.java

   1 /*
   2  * Copyright (C) 2010 The Android Open Source Project
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License
  15  */
  16
  17 package org.lineageos.eleven.locale;
  18
  19 import android.provider.ContactsContract.FullNameStyle;
  20 import android.provider.ContactsContract.PhoneticNameStyle;
  21 import android.support.annotation.VisibleForTesting;
  22 import android.text.TextUtils;
  23 import android.util.Log;
  24
  25 import org.lineageos.eleven.locale.HanziToPinyin.Token;
  26
  27 import java.lang.Character.UnicodeBlock;
  28 import java.util.ArrayList;
  29 import java.util.Collections;
  30 import java.util.HashSet;
  31 import java.util.Iterator;
  32 import java.util.Locale;
  33 import java.util.Set;
  34
  35 import android.icu.text.AlphabeticIndex;
  36 import android.icu.text.AlphabeticIndex.ImmutableIndex;
  37 import android.icu.text.Transliterator;
  38
  39 /**
  40  * This utility class provides specialized handling for locale specific
  41  * information: labels, name lookup keys.
  42  *
  43  * This class has been modified from ContactLocaleUtils.java for now to rip out
  44  * Chinese/Japanese specific Alphabetic Indexers because the MediaProvider's sort
  45  * is using a Collator sort which can result in confusing behavior, so for now we will
  46  * simplify and batch up those results until we later support our own internal databases
  47  * An example of what This is, if we have songs "Able", "Xylophone" and "上" in
  48  * simplified chinese language The media provider would give it to us in that order sorted,
  49  * but the ICU lib would return "A", "X", "S".  Unless we write our own db or do our own sort
  50  * there is no good easy solution
  51  */
  52 public class LocaleUtils {
  53     public static final String TAG = "MusicLocale";
  54
  55     public static final Locale LOCALE_ARABIC = new Locale("ar");
  56     public static final Locale LOCALE_GREEK = new Locale("el");
  57     public static final Locale LOCALE_HEBREW = new Locale("he");
  58     // Serbian and Ukrainian labels are complementary supersets of Russian
  59     public static final Locale LOCALE_SERBIAN = new Locale("sr");
  60     public static final Locale LOCALE_UKRAINIAN = new Locale("uk");
  61     public static final Locale LOCALE_THAI = new Locale("th");
  62
  63     /**
  64      * This class is the default implementation and should be the base class
  65      * for other locales.
  66      *
  67      * sortKey: same as name
  68      * nameLookupKeys: none
  69      * labels: uses ICU AlphabeticIndex for labels and extends by labeling
  70      *     phone numbers "#".  Eg English labels are: [A-Z], #, " "
  71      */
  72     private static class LocaleUtilsBase {
  73         private static final String EMPTY_STRING = "";
  74         private static final String NUMBER_STRING = "#";
  75
  76         protected final ImmutableIndex mAlphabeticIndex;
  77         private final int mAlphabeticIndexBucketCount;
  78         private final int mNumberBucketIndex;
  79         private final boolean mEnableSecondaryLocalePinyin;
  80
  81         public LocaleUtilsBase(LocaleSet locales) {
  82             // AlphabeticIndex.getBucketLabel() uses a binary search across
  83             // the entire label set so care should be taken about growing this
  84             // set too large. The following set determines for which locales
  85             // we will show labels other than your primary locale. General rules
  86             // of thumb for adding a locale: should be a supported locale; and
  87             // should not be included if from a name it is not deterministic
  88             // which way to label it (so eg Chinese cannot be added because
  89             // the labeling of a Chinese character varies between Simplified,
  90             // Traditional, and Japanese locales). Use English only for all
  91             // Latin based alphabets. Ukrainian and Serbian are chosen for
  92             // Cyrillic because their alphabets are complementary supersets
  93             // of Russian.
  94             final Locale secondaryLocale = locales.getSecondaryLocale();
  95             mEnableSecondaryLocalePinyin = locales.isSecondaryLocaleSimplifiedChinese();
  96             AlphabeticIndex ai = new AlphabeticIndex(locales.getPrimaryLocale())
  97                 .setMaxLabelCount(300);
  98             if (secondaryLocale != null) {
  99                 ai.addLabels(secondaryLocale);
 100             }
 101             mAlphabeticIndex = ai.addLabels(Locale.ENGLISH)
 102                 .addLabels(Locale.JAPANESE)
 103                 .addLabels(Locale.KOREAN)
 104                 .addLabels(LOCALE_THAI)
 105                 .addLabels(LOCALE_ARABIC)
 106                 .addLabels(LOCALE_HEBREW)
 107                 .addLabels(LOCALE_GREEK)
 108                 .addLabels(LOCALE_UKRAINIAN)
 109                 .addLabels(LOCALE_SERBIAN)
 110                 .buildImmutableIndex();
 111             mAlphabeticIndexBucketCount = mAlphabeticIndex.getBucketCount();
 112             mNumberBucketIndex = mAlphabeticIndexBucketCount - 1;
 113         }
 114
 115         public String getSortKey(String name) {
 116             return name;
 117         }
 118
 119         /**
 120          * Returns the bucket index for the specified string. AlphabeticIndex
 121          * sorts strings into buckets numbered in order from 0 to N, where the
 122          * exact value of N depends on how many representative index labels are
 123          * used in a particular locale. This routine adds one additional bucket
 124          * for phone numbers. It attempts to detect phone numbers and shifts
 125          * the bucket indexes returned by AlphabeticIndex in order to make room
 126          * for the new # bucket, so the returned range becomes 0 to N+1.
 127          */
 128         public int getBucketIndex(String name) {
 129             if (name == null) {
 130                 return -1;
 131             }
 132             boolean prefixIsNumeric = false;
 133             final int length = name.length();
 134             int offset = 0;
 135             while (offset < length) {
 136                 int codePoint = Character.codePointAt(name, offset);
 137                 // Ignore standard phone number separators and identify any
 138                 // string that otherwise starts with a number.
 139                 if (Character.isDigit(codePoint)) {
 140                     prefixIsNumeric = true;
 141                     break;
 142                 } else if (!Character.isSpaceChar(codePoint) &&
 143                            codePoint != '+' && codePoint != '(' &&
 144                            codePoint != ')' && codePoint != '.' &&
 145                            codePoint != '-' && codePoint != '#') {
 146                     break;
 147                 }
 148                 offset += Character.charCount(codePoint);
 149             }
 150             if (prefixIsNumeric) {
 151                 return mNumberBucketIndex;
 152             }
 153
 154             /**
 155              * TODO: ICU 52 AlphabeticIndex doesn't support Simplified Chinese
 156              * as a secondary locale. Remove the following if that is added.
 157              */
 158             if (mEnableSecondaryLocalePinyin) {
 159                 name = HanziToPinyin.getInstance().transliterate(name);
 160             }
 161             final int bucket = mAlphabeticIndex.getBucketIndex(name);
 162             if (bucket < 0) {
 163                 return -1;
 164             }
 165             if (bucket >= mNumberBucketIndex) {
 166                 return bucket + 1;
 167             }
 168             return bucket;
 169         }
 170
 171         /**
 172          * Returns the number of buckets in use (one more than AlphabeticIndex
 173          * uses, because this class adds a bucket for phone numbers).
 174          */
 175         public int getBucketCount() {
 176             return mAlphabeticIndexBucketCount + 1;
 177         }
 178
 179         /**
 180          * Returns the label for the specified bucket index if a valid index,
 181          * otherwise returns an empty string. '#' is returned for the phone
 182          * number bucket; for all others, the AlphabeticIndex label is returned.
 183          */
 184         public String getBucketLabel(int bucketIndex) {
 185             if (bucketIndex < 0 || bucketIndex >= getBucketCount()) {
 186                 return EMPTY_STRING;
 187             } else if (bucketIndex == mNumberBucketIndex) {
 188                 return NUMBER_STRING;
 189             } else if (bucketIndex > mNumberBucketIndex) {
 190                 --bucketIndex;
 191             }
 192             return mAlphabeticIndex.getBucket(bucketIndex).getLabel();
 193         }
 194
 195         @SuppressWarnings("unused")
 196         public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
 197             return null;
 198         }
 199
 200         public ArrayList<String> getLabels() {
 201             final int bucketCount = getBucketCount();
 202             final ArrayList<String> labels = new ArrayList<String>(bucketCount);
 203             for(int i = 0; i < bucketCount; ++i) {
 204                 labels.add(getBucketLabel(i));
 205             }
 206             return labels;
 207         }
 208     }
 209
 210     /**
 211      * Japanese specific locale overrides.
 212      *
 213      * sortKey: unchanged (same as name)
 214      * nameLookupKeys: unchanged (none)
 215      * labels: extends default labels by labeling unlabeled CJ characters
 216      *     with the Japanese character 他 ("misc"). Japanese labels are:
 217      *     あ, か, さ, た, な, は, ま, や, ら, わ, 他, [A-Z], #, " "
 218      */
 219     private static class JapaneseContactUtils extends LocaleUtilsBase {
 220         // \u4ed6 is Japanese character 他 ("misc")
 221         private static final String JAPANESE_MISC_LABEL = "\u4ed6";
 222         private final int mMiscBucketIndex;
 223
 224         public JapaneseContactUtils(LocaleSet locales) {
 225             super(locales);
 226             // Determine which bucket AlphabeticIndex is lumping unclassified
 227             // Japanese characters into by looking up the bucket index for
 228             // a representative Kanji/CJK unified ideograph (\u65e5 is the
 229             // character '日').
 230             mMiscBucketIndex = super.getBucketIndex("\u65e5");
 231         }
 232
 233         // Set of UnicodeBlocks for unified CJK (Chinese) characters and
 234         // Japanese characters. This includes all code blocks that might
 235         // contain a character used in Japanese (which is why unified CJK
 236         // blocks are included but Korean Hangul and jamo are not).
 237         private static final Set<Character.UnicodeBlock> CJ_BLOCKS;
 238         static {
 239             Set<UnicodeBlock> set = new HashSet<UnicodeBlock>();
 240             set.add(UnicodeBlock.HIRAGANA);
 241             set.add(UnicodeBlock.KATAKANA);
 242             set.add(UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS);
 243             set.add(UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS);
 244             set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS);
 245             set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A);
 246             set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B);
 247             set.add(UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION);
 248             set.add(UnicodeBlock.CJK_RADICALS_SUPPLEMENT);
 249             set.add(UnicodeBlock.CJK_COMPATIBILITY);
 250             set.add(UnicodeBlock.CJK_COMPATIBILITY_FORMS);
 251             set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS);
 252             set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT);
 253             CJ_BLOCKS = Collections.unmodifiableSet(set);
 254         }
 255
 256         /**
 257          * Helper routine to identify unlabeled Chinese or Japanese characters
 258          * to put in a 'misc' bucket.
 259          *
 260          * @return true if the specified Unicode code point is Chinese or
 261          *              Japanese
 262          */
 263         private static boolean isChineseOrJapanese(int codePoint) {
 264             return CJ_BLOCKS.contains(UnicodeBlock.of(codePoint));
 265         }
 266
 267         /**
 268          * Returns the bucket index for the specified string. Adds an
 269          * additional 'misc' bucket for Kanji characters to the base class set.
 270          */
 271         @Override
 272         public int getBucketIndex(String name) {
 273             final int bucketIndex = super.getBucketIndex(name);
 274             if ((bucketIndex == mMiscBucketIndex &&
 275                  !isChineseOrJapanese(Character.codePointAt(name, 0))) ||
 276                 bucketIndex > mMiscBucketIndex) {
 277                 return bucketIndex + 1;
 278             }
 279             return bucketIndex;
 280         }
 281
 282         /**
 283          * Returns the number of buckets in use (one more than the base class
 284          * uses, because this class adds a bucket for Kanji).
 285          */
 286         @Override
 287         public int getBucketCount() {
 288             return super.getBucketCount() + 1;
 289         }
 290
 291         /**
 292          * Returns the label for the specified bucket index if a valid index,
 293          * otherwise returns an empty string. '他' is returned for unclassified
 294          * Kanji; for all others, the label determined by the base class is
 295          * returned.
 296          */
 297         @Override
 298         public String getBucketLabel(int bucketIndex) {
 299             if (bucketIndex == mMiscBucketIndex) {
 300                 return JAPANESE_MISC_LABEL;
 301             } else if (bucketIndex > mMiscBucketIndex) {
 302                 --bucketIndex;
 303             }
 304             return super.getBucketLabel(bucketIndex);
 305         }
 306
 307         @Override
 308         public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
 309             // Hiragana and Katakana will be positively identified as Japanese.
 310             if (nameStyle == PhoneticNameStyle.JAPANESE) {
 311                 return getRomajiNameLookupKeys(name);
 312             }
 313             return null;
 314         }
 315
 316         private static boolean mInitializedTransliterator;
 317         private static Transliterator mJapaneseTransliterator;
 318
 319         private static Transliterator getJapaneseTransliterator() {
 320             synchronized(JapaneseContactUtils.class) {
 321                 if (!mInitializedTransliterator) {
 322                     mInitializedTransliterator = true;
 323                     Transliterator t = null;
 324                     try {
 325                         t = Transliterator.getInstance("Hiragana-Latin; Katakana-Latin;"
 326                                 + " Latin-Ascii");
 327                     } catch (RuntimeException e) {
 328                         Log.w(TAG, "Hiragana/Katakana-Latin transliterator data"
 329                                 + " is missing");
 330                     }
 331                     mJapaneseTransliterator = t;
 332                 }
 333                 return mJapaneseTransliterator;
 334             }
 335         }
 336
 337         public static Iterator<String> getRomajiNameLookupKeys(String name) {
 338             final Transliterator t = getJapaneseTransliterator();
 339             if (t == null) {
 340                 return null;
 341             }
 342             final String romajiName = t.transliterate(name);
 343             if (TextUtils.isEmpty(romajiName) ||
 344                     TextUtils.equals(name, romajiName)) {
 345                 return null;
 346             }
 347             final HashSet<String> keys = new HashSet<String>();
 348             keys.add(romajiName);
 349             return keys.iterator();
 350         }
 351     }
 352
 353     /**
 354      * Simplified Chinese specific locale overrides. Uses ICU Transliterator
 355      * for generating pinyin transliteration.
 356      *
 357      * sortKey: unchanged (same as name)
 358      * nameLookupKeys: adds additional name lookup keys
 359      *     - Chinese character's pinyin and pinyin's initial character.
 360      *     - Latin word and initial character.
 361      * labels: unchanged
 362      *     Simplified Chinese labels are the same as English: [A-Z], #, " "
 363      */
 364     private static class SimplifiedChineseContactUtils
 365         extends LocaleUtilsBase {
 366         public SimplifiedChineseContactUtils(LocaleSet locales) {
 367             super(locales);
 368         }
 369
 370         @Override
 371         public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
 372             if (nameStyle != FullNameStyle.JAPANESE &&
 373                     nameStyle != FullNameStyle.KOREAN) {
 374                 return getPinyinNameLookupKeys(name);
 375             }
 376             return null;
 377         }
 378
 379         public static Iterator<String> getPinyinNameLookupKeys(String name) {
 380             // TODO : Reduce the object allocation.
 381             HashSet<String> keys = new HashSet<String>();
 382             ArrayList<Token> tokens = HanziToPinyin.getInstance().getTokens(name);
 383             final int tokenCount = tokens.size();
 384             final StringBuilder keyPinyin = new StringBuilder();
 385             final StringBuilder keyInitial = new StringBuilder();
 386             // There is no space among the Chinese Characters, the variant name
 387             // lookup key wouldn't work for Chinese. The keyOriginal is used to
 388             // build the lookup keys for itself.
 389             final StringBuilder keyOriginal = new StringBuilder();
 390             for (int i = tokenCount - 1; i >= 0; i--) {
 391                 final Token token = tokens.get(i);
 392                 if (Token.UNKNOWN == token.type) {
 393                     continue;
 394                 }
 395                 if (Token.PINYIN == token.type) {
 396                     keyPinyin.insert(0, token.target);
 397                     keyInitial.insert(0, token.target.charAt(0));
 398                 } else if (Token.LATIN == token.type) {
 399                     // Avoid adding space at the end of String.
 400                     if (keyPinyin.length() > 0) {
 401                         keyPinyin.insert(0, ' ');
 402                     }
 403                     if (keyOriginal.length() > 0) {
 404                         keyOriginal.insert(0, ' ');
 405                     }
 406                     keyPinyin.insert(0, token.source);
 407                     keyInitial.insert(0, token.source.charAt(0));
 408                 }
 409                 keyOriginal.insert(0, token.source);
 410                 keys.add(keyOriginal.toString());
 411                 keys.add(keyPinyin.toString());
 412                 keys.add(keyInitial.toString());
 413             }
 414             return keys.iterator();
 415         }
 416     }
 417
 418     private static final String JAPANESE_LANGUAGE = Locale.JAPANESE.getLanguage().toLowerCase();
 419     private static LocaleUtils sSingleton;
 420
 421     private final LocaleSet mLocales;
 422     private final LocaleUtilsBase mUtils;
 423
 424     private LocaleUtils(LocaleSet locales) {
 425         if (locales == null) {
 426             mLocales = LocaleSet.getDefault();
 427         } else {
 428             mLocales = locales;
 429         }
 430         if (mLocales.isPrimaryLanguage(JAPANESE_LANGUAGE)) {
 431             mUtils = new JapaneseContactUtils(mLocales);
 432         } else if (mLocales.isPrimaryLocaleSimplifiedChinese()) {
 433             mUtils = new SimplifiedChineseContactUtils(mLocales);
 434         } else {
 435             mUtils = new LocaleUtilsBase(mLocales);
 436         }
 437         Log.i(TAG, "AddressBook Labels [" + mLocales.toString() + "]: "
 438                 + getLabels().toString());
 439     }
 440
 441     public boolean isLocale(LocaleSet locales) {
 442         return mLocales.equals(locales);
 443     }
 444
 445     public static synchronized LocaleUtils getInstance() {
 446         if (sSingleton == null) {
 447             sSingleton = new LocaleUtils(LocaleSet.getDefault());
 448         }
 449         return sSingleton;
 450     }
 451
 452     @VisibleForTesting
 453     public static synchronized void setLocale(Locale locale) {
 454         setLocales(new LocaleSet(locale));
 455     }
 456
 457     public static synchronized void setLocales(LocaleSet locales) {
 458         if (sSingleton == null || !sSingleton.isLocale(locales)) {
 459             sSingleton = new LocaleUtils(locales);
 460         }
 461     }
 462
 463     public String getSortKey(String name, int nameStyle) {
 464         return mUtils.getSortKey(name);
 465     }
 466
 467     public int getBucketIndex(String name) {
 468         return mUtils.getBucketIndex(name);
 469     }
 470
 471     public int getBucketCount() {
 472         return mUtils.getBucketCount();
 473     }
 474
 475     public String getBucketLabel(int bucketIndex) {
 476         return mUtils.getBucketLabel(bucketIndex);
 477     }
 478
 479     public String getLabel(String name) {
 480         return getBucketLabel(getBucketIndex(name));
 481     }
 482
 483     public ArrayList<String> getLabels() {
 484         return mUtils.getLabels();
 485     }
 486 }