OSDN Git Service

Get rid of icu4j dependency
authorAlexander Martinz <amartinz@shiftphones.com>
Tue, 26 Feb 2019 17:31:06 +0000 (18:31 +0100)
committerMichael Bestas <mkbestas@lineageos.org>
Tue, 19 Mar 2019 20:56:33 +0000 (22:56 +0200)
Change-Id: I0eb134110fc6116a89600b6d343d775da2f13530
Signed-off-by: Alexander Martinz <amartinz@shiftphones.com>
src/org/lineageos/eleven/locale/HanziToPinyin.java [deleted file]
src/org/lineageos/eleven/locale/LocaleUtils.java

diff --git a/src/org/lineageos/eleven/locale/HanziToPinyin.java b/src/org/lineageos/eleven/locale/HanziToPinyin.java
deleted file mode 100644 (file)
index e0ed0ab..0000000
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.lineageos.eleven.locale;
-
-import android.text.TextUtils;
-import android.util.Log;
-
-import java.util.ArrayList;
-
-import android.icu.text.Transliterator;
-
-/**
- * An object to convert Chinese character to its corresponding pinyin string.
- * For characters with multiple possible pinyin string, only one is selected
- * according to ICU Transliterator class. Polyphone is not supported in this
- * implementation.
- */
-public class HanziToPinyin {
-    private static final String TAG = "HanziToPinyin";
-
-    private static HanziToPinyin sInstance;
-    private Transliterator mPinyinTransliterator;
-    private Transliterator mAsciiTransliterator;
-
-    public static class Token {
-        /**
-         * Separator between target string for each source char
-         */
-        public static final String SEPARATOR = " ";
-
-        public static final int LATIN = 1;
-        public static final int PINYIN = 2;
-        public static final int UNKNOWN = 3;
-
-        public Token() {
-        }
-
-        public Token(int type, String source, String target) {
-            this.type = type;
-            this.source = source;
-            this.target = target;
-        }
-
-        /**
-         * Type of this token, ASCII, PINYIN or UNKNOWN.
-         */
-        public int type;
-        /**
-         * Original string before translation.
-         */
-        public String source;
-        /**
-         * Translated string of source. For Han, target is corresponding Pinyin. Otherwise target is
-         * original string in source.
-         */
-        public String target;
-    }
-
-    private HanziToPinyin() {
-        try {
-            mPinyinTransliterator = Transliterator.getInstance("Han-Latin/Names; Latin-Ascii; Any-Upper");
-            mAsciiTransliterator = Transliterator.getInstance("Latin-Ascii");
-        } catch (RuntimeException e) {
-            Log.w(TAG, "Han-Latin/Names transliterator data is missing,"
-                    + " HanziToPinyin is disabled");
-        }
-    }
-
-    public boolean hasChineseTransliterator() {
-        return mPinyinTransliterator != null;
-    }
-
-    public static HanziToPinyin getInstance() {
-        synchronized (HanziToPinyin.class) {
-            if (sInstance == null) {
-                sInstance = new HanziToPinyin();
-            }
-            return sInstance;
-        }
-    }
-
-    private void tokenize(char character, Token token) {
-        token.source = Character.toString(character);
-
-        // ASCII
-        if (character < 128) {
-            token.type = Token.LATIN;
-            token.target = token.source;
-            return;
-        }
-
-        // Extended Latin. Transcode these to ASCII equivalents
-        if (character < 0x250 || (0x1e00 <= character && character < 0x1eff)) {
-            token.type = Token.LATIN;
-            token.target = mAsciiTransliterator == null ? token.source :
-                    mAsciiTransliterator.transliterate(token.source);
-            return;
-        }
-
-        token.type = Token.PINYIN;
-        token.target = mPinyinTransliterator.transliterate(token.source);
-        if (TextUtils.isEmpty(token.target) ||
-                TextUtils.equals(token.source, token.target)) {
-            token.type = Token.UNKNOWN;
-            token.target = token.source;
-        }
-    }
-
-    public String transliterate(final String input) {
-        if (!hasChineseTransliterator() || TextUtils.isEmpty(input)) {
-            return null;
-        }
-        return mPinyinTransliterator.transliterate(input);
-    }
-
-    /**
-     * Convert the input to a array of tokens. The sequence of ASCII or Unknown characters without
-     * space will be put into a Token, One Hanzi character which has pinyin will be treated as a
-     * Token. If there is no Chinese transliterator, the empty token array is returned.
-     */
-    public ArrayList<Token> getTokens(final String input) {
-        ArrayList<Token> tokens = new ArrayList<Token>();
-        if (!hasChineseTransliterator() || TextUtils.isEmpty(input)) {
-            // return empty tokens.
-            return tokens;
-        }
-
-        final int inputLength = input.length();
-        final StringBuilder sb = new StringBuilder();
-        int tokenType = Token.LATIN;
-        Token token = new Token();
-
-        // Go through the input, create a new token when
-        // a. Token type changed
-        // b. Get the Pinyin of current charater.
-        // c. current character is space.
-        for (int i = 0; i < inputLength; i++) {
-            final char character = input.charAt(i);
-            if (Character.isSpaceChar(character)) {
-                if (sb.length() > 0) {
-                    addToken(sb, tokens, tokenType);
-                }
-            } else {
-                tokenize(character, token);
-                if (token.type == Token.PINYIN) {
-                    if (sb.length() > 0) {
-                        addToken(sb, tokens, tokenType);
-                    }
-                    tokens.add(token);
-                    token = new Token();
-                } else {
-                    if (tokenType != token.type && sb.length() > 0) {
-                        addToken(sb, tokens, tokenType);
-                    }
-                    sb.append(token.target);
-                }
-                tokenType = token.type;
-            }
-        }
-        if (sb.length() > 0) {
-            addToken(sb, tokens, tokenType);
-        }
-        return tokens;
-    }
-
-    private void addToken(
-            final StringBuilder sb, final ArrayList<Token> tokens, final int tokenType) {
-        String str = sb.toString();
-        tokens.add(new Token(tokenType, str, str));
-        sb.setLength(0);
-    }
-}
index 8f91639..2f27372 100644 (file)
 
 package org.lineageos.eleven.locale;
 
-import android.provider.ContactsContract.FullNameStyle;
-import android.provider.ContactsContract.PhoneticNameStyle;
+import android.icu.text.AlphabeticIndex;
 import android.support.annotation.VisibleForTesting;
-import android.text.TextUtils;
 import android.util.Log;
 
-import org.lineageos.eleven.locale.HanziToPinyin.Token;
-
-import java.lang.Character.UnicodeBlock;
 import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Locale;
-import java.util.Set;
-
-import android.icu.text.AlphabeticIndex;
-import android.icu.text.AlphabeticIndex.ImmutableIndex;
-import android.icu.text.Transliterator;
 
 /**
  * This utility class provides specialized handling for locale specific
@@ -73,10 +61,9 @@ public class LocaleUtils {
         private static final String EMPTY_STRING = "";
         private static final String NUMBER_STRING = "#";
 
-        protected final ImmutableIndex mAlphabeticIndex;
+        protected final AlphabeticIndex.ImmutableIndex mAlphabeticIndex;
         private final int mAlphabeticIndexBucketCount;
         private final int mNumberBucketIndex;
-        private final boolean mEnableSecondaryLocalePinyin;
 
         public LocaleUtilsBase(LocaleSet locales) {
             // AlphabeticIndex.getBucketLabel() uses a binary search across
@@ -92,7 +79,6 @@ public class LocaleUtils {
             // Cyrillic because their alphabets are complementary supersets
             // of Russian.
             final Locale secondaryLocale = locales.getSecondaryLocale();
-            mEnableSecondaryLocalePinyin = locales.isSecondaryLocaleSimplifiedChinese();
             AlphabeticIndex ai = new AlphabeticIndex(locales.getPrimaryLocale())
                 .setMaxLabelCount(300);
             if (secondaryLocale != null) {
@@ -151,13 +137,6 @@ public class LocaleUtils {
                 return mNumberBucketIndex;
             }
 
-            /**
-             * TODO: ICU 52 AlphabeticIndex doesn't support Simplified Chinese
-             * as a secondary locale. Remove the following if that is added.
-             */
-            if (mEnableSecondaryLocalePinyin) {
-                name = HanziToPinyin.getInstance().transliterate(name);
-            }
             final int bucket = mAlphabeticIndex.getBucketIndex(name);
             if (bucket < 0) {
                 return -1;
@@ -207,215 +186,6 @@ public class LocaleUtils {
         }
     }
 
-    /**
-     * Japanese specific locale overrides.
-     *
-     * sortKey: unchanged (same as name)
-     * nameLookupKeys: unchanged (none)
-     * labels: extends default labels by labeling unlabeled CJ characters
-     *     with the Japanese character 他 ("misc"). Japanese labels are:
-     *     あ, か, さ, た, な, は, ま, や, ら, わ, 他, [A-Z], #, " "
-     */
-    private static class JapaneseContactUtils extends LocaleUtilsBase {
-        // \u4ed6 is Japanese character 他 ("misc")
-        private static final String JAPANESE_MISC_LABEL = "\u4ed6";
-        private final int mMiscBucketIndex;
-
-        public JapaneseContactUtils(LocaleSet locales) {
-            super(locales);
-            // Determine which bucket AlphabeticIndex is lumping unclassified
-            // Japanese characters into by looking up the bucket index for
-            // a representative Kanji/CJK unified ideograph (\u65e5 is the
-            // character '日').
-            mMiscBucketIndex = super.getBucketIndex("\u65e5");
-        }
-
-        // Set of UnicodeBlocks for unified CJK (Chinese) characters and
-        // Japanese characters. This includes all code blocks that might
-        // contain a character used in Japanese (which is why unified CJK
-        // blocks are included but Korean Hangul and jamo are not).
-        private static final Set<Character.UnicodeBlock> CJ_BLOCKS;
-        static {
-            Set<UnicodeBlock> set = new HashSet<UnicodeBlock>();
-            set.add(UnicodeBlock.HIRAGANA);
-            set.add(UnicodeBlock.KATAKANA);
-            set.add(UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS);
-            set.add(UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS);
-            set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS);
-            set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A);
-            set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B);
-            set.add(UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION);
-            set.add(UnicodeBlock.CJK_RADICALS_SUPPLEMENT);
-            set.add(UnicodeBlock.CJK_COMPATIBILITY);
-            set.add(UnicodeBlock.CJK_COMPATIBILITY_FORMS);
-            set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS);
-            set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT);
-            CJ_BLOCKS = Collections.unmodifiableSet(set);
-        }
-
-        /**
-         * Helper routine to identify unlabeled Chinese or Japanese characters
-         * to put in a 'misc' bucket.
-         *
-         * @return true if the specified Unicode code point is Chinese or
-         *              Japanese
-         */
-        private static boolean isChineseOrJapanese(int codePoint) {
-            return CJ_BLOCKS.contains(UnicodeBlock.of(codePoint));
-        }
-
-        /**
-         * Returns the bucket index for the specified string. Adds an
-         * additional 'misc' bucket for Kanji characters to the base class set.
-         */
-        @Override
-        public int getBucketIndex(String name) {
-            final int bucketIndex = super.getBucketIndex(name);
-            if ((bucketIndex == mMiscBucketIndex &&
-                 !isChineseOrJapanese(Character.codePointAt(name, 0))) ||
-                bucketIndex > mMiscBucketIndex) {
-                return bucketIndex + 1;
-            }
-            return bucketIndex;
-        }
-
-        /**
-         * Returns the number of buckets in use (one more than the base class
-         * uses, because this class adds a bucket for Kanji).
-         */
-        @Override
-        public int getBucketCount() {
-            return super.getBucketCount() + 1;
-        }
-
-        /**
-         * Returns the label for the specified bucket index if a valid index,
-         * otherwise returns an empty string. '他' is returned for unclassified
-         * Kanji; for all others, the label determined by the base class is
-         * returned.
-         */
-        @Override
-        public String getBucketLabel(int bucketIndex) {
-            if (bucketIndex == mMiscBucketIndex) {
-                return JAPANESE_MISC_LABEL;
-            } else if (bucketIndex > mMiscBucketIndex) {
-                --bucketIndex;
-            }
-            return super.getBucketLabel(bucketIndex);
-        }
-
-        @Override
-        public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
-            // Hiragana and Katakana will be positively identified as Japanese.
-            if (nameStyle == PhoneticNameStyle.JAPANESE) {
-                return getRomajiNameLookupKeys(name);
-            }
-            return null;
-        }
-
-        private static boolean mInitializedTransliterator;
-        private static Transliterator mJapaneseTransliterator;
-
-        private static Transliterator getJapaneseTransliterator() {
-            synchronized(JapaneseContactUtils.class) {
-                if (!mInitializedTransliterator) {
-                    mInitializedTransliterator = true;
-                    Transliterator t = null;
-                    try {
-                        t = Transliterator.getInstance("Hiragana-Latin; Katakana-Latin;"
-                                + " Latin-Ascii");
-                    } catch (RuntimeException e) {
-                        Log.w(TAG, "Hiragana/Katakana-Latin transliterator data"
-                                + " is missing");
-                    }
-                    mJapaneseTransliterator = t;
-                }
-                return mJapaneseTransliterator;
-            }
-        }
-
-        public static Iterator<String> getRomajiNameLookupKeys(String name) {
-            final Transliterator t = getJapaneseTransliterator();
-            if (t == null) {
-                return null;
-            }
-            final String romajiName = t.transliterate(name);
-            if (TextUtils.isEmpty(romajiName) ||
-                    TextUtils.equals(name, romajiName)) {
-                return null;
-            }
-            final HashSet<String> keys = new HashSet<String>();
-            keys.add(romajiName);
-            return keys.iterator();
-        }
-    }
-
-    /**
-     * Simplified Chinese specific locale overrides. Uses ICU Transliterator
-     * for generating pinyin transliteration.
-     *
-     * sortKey: unchanged (same as name)
-     * nameLookupKeys: adds additional name lookup keys
-     *     - Chinese character's pinyin and pinyin's initial character.
-     *     - Latin word and initial character.
-     * labels: unchanged
-     *     Simplified Chinese labels are the same as English: [A-Z], #, " "
-     */
-    private static class SimplifiedChineseContactUtils
-        extends LocaleUtilsBase {
-        public SimplifiedChineseContactUtils(LocaleSet locales) {
-            super(locales);
-        }
-
-        @Override
-        public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
-            if (nameStyle != FullNameStyle.JAPANESE &&
-                    nameStyle != FullNameStyle.KOREAN) {
-                return getPinyinNameLookupKeys(name);
-            }
-            return null;
-        }
-
-        public static Iterator<String> getPinyinNameLookupKeys(String name) {
-            // TODO : Reduce the object allocation.
-            HashSet<String> keys = new HashSet<String>();
-            ArrayList<Token> tokens = HanziToPinyin.getInstance().getTokens(name);
-            final int tokenCount = tokens.size();
-            final StringBuilder keyPinyin = new StringBuilder();
-            final StringBuilder keyInitial = new StringBuilder();
-            // There is no space among the Chinese Characters, the variant name
-            // lookup key wouldn't work for Chinese. The keyOriginal is used to
-            // build the lookup keys for itself.
-            final StringBuilder keyOriginal = new StringBuilder();
-            for (int i = tokenCount - 1; i >= 0; i--) {
-                final Token token = tokens.get(i);
-                if (Token.UNKNOWN == token.type) {
-                    continue;
-                }
-                if (Token.PINYIN == token.type) {
-                    keyPinyin.insert(0, token.target);
-                    keyInitial.insert(0, token.target.charAt(0));
-                } else if (Token.LATIN == token.type) {
-                    // Avoid adding space at the end of String.
-                    if (keyPinyin.length() > 0) {
-                        keyPinyin.insert(0, ' ');
-                    }
-                    if (keyOriginal.length() > 0) {
-                        keyOriginal.insert(0, ' ');
-                    }
-                    keyPinyin.insert(0, token.source);
-                    keyInitial.insert(0, token.source.charAt(0));
-                }
-                keyOriginal.insert(0, token.source);
-                keys.add(keyOriginal.toString());
-                keys.add(keyPinyin.toString());
-                keys.add(keyInitial.toString());
-            }
-            return keys.iterator();
-        }
-    }
-
-    private static final String JAPANESE_LANGUAGE = Locale.JAPANESE.getLanguage().toLowerCase();
     private static LocaleUtils sSingleton;
 
     private final LocaleSet mLocales;
@@ -427,13 +197,8 @@ public class LocaleUtils {
         } else {
             mLocales = locales;
         }
-        if (mLocales.isPrimaryLanguage(JAPANESE_LANGUAGE)) {
-            mUtils = new JapaneseContactUtils(mLocales);
-        } else if (mLocales.isPrimaryLocaleSimplifiedChinese()) {
-            mUtils = new SimplifiedChineseContactUtils(mLocales);
-        } else {
-            mUtils = new LocaleUtilsBase(mLocales);
-        }
+
+        mUtils = new LocaleUtilsBase(mLocales);
         Log.i(TAG, "AddressBook Labels [" + mLocales.toString() + "]: "
                 + getLabels().toString());
     }