2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
20 import com.ibm.icu4jni.util.ICU;
21 import java.util.Comparator;
22 import java.util.Locale;
25 * Performs locale-sensitive string comparison. A concrete subclass,
26 * {@link RuleBasedCollator}, allows customization of the collation ordering by
27 * the use of rule sets.
29 * Following the <a href=http://www.unicode.org>Unicode Consortium</a>'s
30 * specifications for the <a
31 * href="http://www.unicode.org/unicode/reports/tr10/"> Unicode Collation
32 * Algorithm (UCA)</a>, there are 4 different levels of strength used in
35 * <li>PRIMARY strength: Typically, this is used to denote differences between
36 * base characters (for example, "a" < "b"). It is the strongest difference.
37 * For example, dictionaries are divided into different sections by base
39 * <li>SECONDARY strength: Accents in the characters are considered secondary
40 * differences (for example, "as" < "às" < "at"). Other differences
41 * between letters can also be considered secondary differences, depending on
42 * the language. A secondary difference is ignored when there is a primary
43 * difference anywhere in the strings.
44 * <li>TERTIARY strength: Upper and lower case differences in characters are
45 * distinguished at tertiary strength (for example, "ao" < "Ao" <
46 * "aò"). In addition, a variant of a letter differs from the base form
47 * on the tertiary strength (such as "A" and "Ⓐ"). Another example is the
48 * difference between large and small Kana. A tertiary difference is ignored
49 * when there is a primary or secondary difference anywhere in the strings.
50 * <li>IDENTICAL strength: When all other strengths are equal, the IDENTICAL
51 * strength is used as a tiebreaker. The Unicode code point values of the NFD
52 * form of each string are compared, just in case there is no difference. For
53 * example, Hebrew cantellation marks are only distinguished at this strength.
54 * This strength should be used sparingly, as only code point value differences
55 * between two strings are an extremely rare occurrence. Using this strength
56 * substantially decreases the performance for both comparison and collation key
57 * generation APIs. This strength also increases the size of the collation key.
60 * This {@code Collator} deals only with two decomposition modes, the canonical
61 * decomposition mode and one that does not use any decomposition. The
62 * compatibility decomposition mode
63 * {@code java.text.Collator.FULL_DECOMPOSITION} is not supported here. If the
64 * canonical decomposition mode is set, {@code Collator} handles un-normalized
65 * text properly, producing the same results as if the text were normalized in
66 * NFD. If canonical decomposition is turned off, it is the user's
67 * responsibility to ensure that all text is already in the appropriate form
68 * before performing a comparison or before getting a {@link CollationKey}.
74 * // Get the Collator for US English and set its strength to PRIMARY
75 * Collator usCollator = Collator.getInstance(Locale.US);
76 * usCollator.setStrength(Collator.PRIMARY);
77 * if (usCollator.compare("abc", "ABC") == 0) {
78 * System.out.println("Strings are equivalent");
84 * The following example shows how to compare two strings using the collator for
89 * // Compare two strings in the default locale
90 * Collator myCollator = Collator.getInstance();
91 * myCollator.setDecomposition(Collator.NO_DECOMPOSITION);
92 * if (myCollator.compare("\u00e0\u0325", "a\u0325\u0300") != 0) {
93 * System.out.println("\u00e0\u0325 is not equal to a\u0325\u0300 without decomposition");
94 * myCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
95 * if (myCollator.compare("\u00e0\u0325", "a\u0325\u0300") != 0) {
96 * System.out.println("Error: \u00e0\u0325 should be equal to a\u0325\u0300 with decomposition");
98 * System.out.println("\u00e0\u0325 is equal to a\u0325\u0300 with decomposition");
101 * System.out.println("Error: \u00e0\u0325 should be not equal to a\u0325\u0300 without decomposition");
107 * @see RuleBasedCollator
110 public abstract class Collator implements Comparator<Object>, Cloneable {
112 * Constant used to specify the decomposition rule.
114 public static final int NO_DECOMPOSITION = 0;
117 * Constant used to specify the decomposition rule.
119 public static final int CANONICAL_DECOMPOSITION = 1;
122 * Constant used to specify the decomposition rule. This value for
123 * decomposition is not supported.
125 public static final int FULL_DECOMPOSITION = 2;
128 * Constant used to specify the collation strength.
130 public static final int PRIMARY = 0;
133 * Constant used to specify the collation strength.
135 public static final int SECONDARY = 1;
138 * Constant used to specify the collation strength.
140 public static final int TERTIARY = 2;
143 * Constant used to specify the collation strength.
145 public static final int IDENTICAL = 3;
147 // Wrapper class of ICU4JNI Collator
148 com.ibm.icu4jni.text.Collator icuColl;
150 Collator(com.ibm.icu4jni.text.Collator wrapper) {
151 this.icuColl = wrapper;
155 * Constructs a new {@code Collator} instance.
157 protected Collator() {
159 // BEGIN android-added
160 icuColl = com.ibm.icu4jni.text.Collator.getInstance(Locale.getDefault());
165 * Returns a new collator with the same decomposition mode and
166 * strength value as this collator.
168 * @return a shallow copy of this collator.
169 * @see java.lang.Cloneable
172 public Object clone() {
174 Collator clone = (Collator) super.clone();
175 clone.icuColl = (com.ibm.icu4jni.text.Collator) this.icuColl.clone();
177 } catch (CloneNotSupportedException e) {
178 throw new AssertionError(e); // android-changed
183 * Compares two objects to determine their relative order. The objects must
187 * the first string to compare.
189 * the second string to compare.
190 * @return a negative value if {@code object1} is less than {@code object2},
191 * 0 if they are equal, and a positive value if {@code object1} is
192 * greater than {@code object2}.
193 * @throws ClassCastException
194 * if {@code object1} or {@code object2} is not a {@code String}.
196 public int compare(Object object1, Object object2) {
197 return compare((String) object1, (String) object2);
201 * Compares two strings to determine their relative order.
204 * the first string to compare.
206 * the second string to compare.
207 * @return a negative value if {@code string1} is less than {@code string2},
208 * 0 if they are equal and a positive value if {@code string1} is
209 * greater than {@code string2}.
211 public abstract int compare(String string1, String string2);
214 * Compares this collator with the specified object and indicates if they
218 * the object to compare with this object.
219 * @return {@code true} if {@code object} is a {@code Collator} object and
220 * it has the same strength and decomposition values as this
221 * collator; {@code false} otherwise.
225 public boolean equals(Object object) {
226 if (!(object instanceof Collator)) {
229 Collator collator = (Collator) object;
230 return this.icuColl == null ? collator.icuColl == null : this.icuColl
231 .equals(collator.icuColl);
235 * Compares two strings using the collation rules to determine if they are
239 * the first string to compare.
241 * the second string to compare.
242 * @return {@code true} if {@code string1} and {@code string2} are equal
243 * using the collation rules, false otherwise.
245 public boolean equals(String string1, String string2) {
246 return compare(string1, string2) == 0;
250 * Returns an array of locales for which custom {@code Collator} instances
252 * <p>Note that Android does not support user-supplied locale service providers.
254 public static Locale[] getAvailableLocales() {
255 return ICU.getAvailableCollatorLocales();
259 * Returns a {@link CollationKey} for the specified string for this collator
260 * with the current decomposition rule and strength value.
263 * the source string that is converted into a collation key.
264 * @return the collation key for {@code string}.
266 public abstract CollationKey getCollationKey(String string);
269 * Returns the decomposition rule for this collator.
271 * @return the decomposition rule, either {@code NO_DECOMPOSITION} or
272 * {@code CANONICAL_DECOMPOSITION}. {@code FULL_DECOMPOSITION} is
275 public int getDecomposition() {
276 return decompositionMode_ICU_Java(this.icuColl.getDecomposition());
280 * Returns a {@code Collator} instance which is appropriate for the user's default
282 * See "<a href="../util/Locale.html#default_locale">Be wary of the default locale</a>".
284 public static Collator getInstance() {
285 return getInstance(Locale.getDefault());
289 * Returns a {@code Collator} instance which is appropriate for {@code locale}.
291 public static Collator getInstance(Locale locale) {
292 if (locale == null) {
293 throw new NullPointerException();
295 return new RuleBasedCollator(com.ibm.icu4jni.text.Collator.getInstance(locale));
299 * Returns the strength value for this collator.
301 * @return the strength value, either PRIMARY, SECONDARY, TERTIARY or
304 public int getStrength() {
305 return strength_ICU_Java(this.icuColl.getStrength());
309 public abstract int hashCode();
312 * Sets the decomposition rule for this collator.
315 * the decomposition rule, either {@code NO_DECOMPOSITION} or
316 * {@code CANONICAL_DECOMPOSITION}. {@code FULL_DECOMPOSITION}
318 * @throws IllegalArgumentException
319 * if the provided decomposition rule is not valid. This includes
320 * {@code FULL_DECOMPOSITION}.
322 public void setDecomposition(int value) {
323 this.icuColl.setDecomposition(decompositionMode_Java_ICU(value));
327 * Sets the strength value for this collator.
330 * the strength value, either PRIMARY, SECONDARY, TERTIARY, or
332 * @throws IllegalArgumentException
333 * if the provided strength value is not valid.
335 public void setStrength(int value) {
336 this.icuColl.setStrength(strength_Java_ICU(value));
339 private int decompositionMode_Java_ICU(int mode) {
341 case Collator.CANONICAL_DECOMPOSITION:
342 return com.ibm.icu4jni.text.Collator.CANONICAL_DECOMPOSITION;
343 case Collator.NO_DECOMPOSITION:
344 return com.ibm.icu4jni.text.Collator.NO_DECOMPOSITION;
346 throw new IllegalArgumentException();
349 private int decompositionMode_ICU_Java(int mode) {
352 case com.ibm.icu4jni.text.Collator.NO_DECOMPOSITION:
353 javaMode = Collator.NO_DECOMPOSITION;
355 case com.ibm.icu4jni.text.Collator.CANONICAL_DECOMPOSITION:
356 javaMode = Collator.CANONICAL_DECOMPOSITION;
362 private int strength_Java_ICU(int value) {
364 case Collator.PRIMARY:
365 return com.ibm.icu4jni.text.Collator.PRIMARY;
366 case Collator.SECONDARY:
367 return com.ibm.icu4jni.text.Collator.SECONDARY;
368 case Collator.TERTIARY:
369 return com.ibm.icu4jni.text.Collator.TERTIARY;
370 case Collator.IDENTICAL:
371 return com.ibm.icu4jni.text.Collator.IDENTICAL;
373 throw new IllegalArgumentException();
376 private int strength_ICU_Java(int value) {
377 int javaValue = value;
379 case com.ibm.icu4jni.text.Collator.PRIMARY:
380 javaValue = Collator.PRIMARY;
382 case com.ibm.icu4jni.text.Collator.SECONDARY:
383 javaValue = Collator.SECONDARY;
385 case com.ibm.icu4jni.text.Collator.TERTIARY:
386 javaValue = Collator.TERTIARY;
388 case com.ibm.icu4jni.text.Collator.IDENTICAL:
389 javaValue = Collator.IDENTICAL;