Collator.java revision 12745:f068a4ffddd2
1/*
2 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26/*
27 * (C) Copyright Taligent, Inc. 1996-1998 -  All Rights Reserved
28 * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
29 *
30 *   The original version of this source code and documentation is copyrighted
31 * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
32 * materials are provided under terms of a License Agreement between Taligent
33 * and Sun. This technology is protected by multiple US and International
34 * patents. This notice and attribution to Taligent may not be removed.
35 *   Taligent is a registered trademark of Taligent, Inc.
36 *
37 */
38
39package java.text;
40
41import java.lang.ref.SoftReference;
42import java.text.spi.CollatorProvider;
43import java.util.Locale;
44import java.util.ResourceBundle;
45import java.util.concurrent.ConcurrentHashMap;
46import java.util.concurrent.ConcurrentMap;
47import sun.util.locale.provider.LocaleProviderAdapter;
48import sun.util.locale.provider.LocaleServiceProviderPool;
49
50
51/**
52 * The <code>Collator</code> class performs locale-sensitive
53 * <code>String</code> comparison. You use this class to build
54 * searching and sorting routines for natural language text.
55 *
56 * <p>
57 * <code>Collator</code> is an abstract base class. Subclasses
58 * implement specific collation strategies. One subclass,
59 * <code>RuleBasedCollator</code>, is currently provided with
60 * the Java Platform and is applicable to a wide set of languages. Other
61 * subclasses may be created to handle more specialized needs.
62 *
63 * <p>
64 * Like other locale-sensitive classes, you can use the static
65 * factory method, <code>getInstance</code>, to obtain the appropriate
66 * <code>Collator</code> object for a given locale. You will only need
67 * to look at the subclasses of <code>Collator</code> if you need
68 * to understand the details of a particular collation strategy or
69 * if you need to modify that strategy.
70 *
71 * <p>
72 * The following example shows how to compare two strings using
73 * the <code>Collator</code> for the default locale.
74 * <blockquote>
75 * <pre>{@code
76 * // Compare two strings in the default locale
77 * Collator myCollator = Collator.getInstance();
78 * if( myCollator.compare("abc", "ABC") < 0 )
79 *     System.out.println("abc is less than ABC");
80 * else
81 *     System.out.println("abc is greater than or equal to ABC");
82 * }</pre>
83 * </blockquote>
84 *
85 * <p>
86 * You can set a <code>Collator</code>'s <em>strength</em> property
87 * to determine the level of difference considered significant in
88 * comparisons. Four strengths are provided: <code>PRIMARY</code>,
89 * <code>SECONDARY</code>, <code>TERTIARY</code>, and <code>IDENTICAL</code>.
90 * The exact assignment of strengths to language features is
91 * locale dependant.  For example, in Czech, "e" and "f" are considered
92 * primary differences, while "e" and "&#283;" are secondary differences,
93 * "e" and "E" are tertiary differences and "e" and "e" are identical.
94 * The following shows how both case and accents could be ignored for
95 * US English.
96 * <blockquote>
97 * <pre>
98 * //Get the Collator for US English and set its strength to PRIMARY
99 * Collator usCollator = Collator.getInstance(Locale.US);
100 * usCollator.setStrength(Collator.PRIMARY);
101 * if( usCollator.compare("abc", "ABC") == 0 ) {
102 *     System.out.println("Strings are equivalent");
103 * }
104 * </pre>
105 * </blockquote>
106 * <p>
107 * For comparing <code>String</code>s exactly once, the <code>compare</code>
108 * method provides the best performance. When sorting a list of
109 * <code>String</code>s however, it is generally necessary to compare each
110 * <code>String</code> multiple times. In this case, <code>CollationKey</code>s
111 * provide better performance. The <code>CollationKey</code> class converts
112 * a <code>String</code> to a series of bits that can be compared bitwise
113 * against other <code>CollationKey</code>s. A <code>CollationKey</code> is
114 * created by a <code>Collator</code> object for a given <code>String</code>.
115 * <br>
116 * <strong>Note:</strong> <code>CollationKey</code>s from different
117 * <code>Collator</code>s can not be compared. See the class description
118 * for {@link CollationKey}
119 * for an example using <code>CollationKey</code>s.
120 *
121 * @see         RuleBasedCollator
122 * @see         CollationKey
123 * @see         CollationElementIterator
124 * @see         Locale
125 * @author      Helena Shih, Laura Werner, Richard Gillam
126 */
127
128public abstract class Collator
129    implements java.util.Comparator<Object>, Cloneable
130{
131    /**
132     * Collator strength value.  When set, only PRIMARY differences are
133     * considered significant during comparison. The assignment of strengths
134     * to language features is locale dependant. A common example is for
135     * different base letters ("a" vs "b") to be considered a PRIMARY difference.
136     * @see java.text.Collator#setStrength
137     * @see java.text.Collator#getStrength
138     */
139    public static final int PRIMARY = 0;
140    /**
141     * Collator strength value.  When set, only SECONDARY and above differences are
142     * considered significant during comparison. The assignment of strengths
143     * to language features is locale dependant. A common example is for
144     * different accented forms of the same base letter ("a" vs "\u00E4") to be
145     * considered a SECONDARY difference.
146     * @see java.text.Collator#setStrength
147     * @see java.text.Collator#getStrength
148     */
149    public static final int SECONDARY = 1;
150    /**
151     * Collator strength value.  When set, only TERTIARY and above differences are
152     * considered significant during comparison. The assignment of strengths
153     * to language features is locale dependant. A common example is for
154     * case differences ("a" vs "A") to be considered a TERTIARY difference.
155     * @see java.text.Collator#setStrength
156     * @see java.text.Collator#getStrength
157     */
158    public static final int TERTIARY = 2;
159
160    /**
161     * Collator strength value.  When set, all differences are
162     * considered significant during comparison. The assignment of strengths
163     * to language features is locale dependant. A common example is for control
164     * characters ("&#092;u0001" vs "&#092;u0002") to be considered equal at the
165     * PRIMARY, SECONDARY, and TERTIARY levels but different at the IDENTICAL
166     * level.  Additionally, differences between pre-composed accents such as
167     * "&#092;u00C0" (A-grave) and combining accents such as "A&#092;u0300"
168     * (A, combining-grave) will be considered significant at the IDENTICAL
169     * level if decomposition is set to NO_DECOMPOSITION.
170     */
171    public static final int IDENTICAL = 3;
172
173    /**
174     * Decomposition mode value. With NO_DECOMPOSITION
175     * set, accented characters will not be decomposed for collation. This
176     * is the default setting and provides the fastest collation but
177     * will only produce correct results for languages that do not use accents.
178     * @see java.text.Collator#getDecomposition
179     * @see java.text.Collator#setDecomposition
180     */
181    public static final int NO_DECOMPOSITION = 0;
182
183    /**
184     * Decomposition mode value. With CANONICAL_DECOMPOSITION
185     * set, characters that are canonical variants according to Unicode
186     * standard will be decomposed for collation. This should be used to get
187     * correct collation of accented characters.
188     * <p>
189     * CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
190     * described in
191     * <a href="http://www.unicode.org/unicode/reports/tr15/tr15-23.html">Unicode
192     * Technical Report #15</a>.
193     * @see java.text.Collator#getDecomposition
194     * @see java.text.Collator#setDecomposition
195     */
196    public static final int CANONICAL_DECOMPOSITION = 1;
197
198    /**
199     * Decomposition mode value. With FULL_DECOMPOSITION
200     * set, both Unicode canonical variants and Unicode compatibility variants
201     * will be decomposed for collation.  This causes not only accented
202     * characters to be collated, but also characters that have special formats
203     * to be collated with their norminal form. For example, the half-width and
204     * full-width ASCII and Katakana characters are then collated together.
205     * FULL_DECOMPOSITION is the most complete and therefore the slowest
206     * decomposition mode.
207     * <p>
208     * FULL_DECOMPOSITION corresponds to Normalization Form KD as
209     * described in
210     * <a href="http://www.unicode.org/unicode/reports/tr15/tr15-23.html">Unicode
211     * Technical Report #15</a>.
212     * @see java.text.Collator#getDecomposition
213     * @see java.text.Collator#setDecomposition
214     */
215    public static final int FULL_DECOMPOSITION = 2;
216
217    /**
218     * Gets the Collator for the current default locale.
219     * The default locale is determined by java.util.Locale.getDefault.
220     * @return the Collator for the default locale.(for example, en_US)
221     * @see java.util.Locale#getDefault
222     */
223    public static synchronized Collator getInstance() {
224        return getInstance(Locale.getDefault());
225    }
226
227    /**
228     * Gets the Collator for the desired locale.
229     * @param desiredLocale the desired locale.
230     * @return the Collator for the desired locale.
231     * @see java.util.Locale
232     * @see java.util.ResourceBundle
233     */
234    public static Collator getInstance(Locale desiredLocale) {
235        SoftReference<Collator> ref = cache.get(desiredLocale);
236        Collator result = (ref != null) ? ref.get() : null;
237        if (result == null) {
238            LocaleProviderAdapter adapter;
239            adapter = LocaleProviderAdapter.getAdapter(CollatorProvider.class,
240                                                       desiredLocale);
241            CollatorProvider provider = adapter.getCollatorProvider();
242            result = provider.getInstance(desiredLocale);
243            if (result == null) {
244                result = LocaleProviderAdapter.forJRE()
245                             .getCollatorProvider().getInstance(desiredLocale);
246            }
247            while (true) {
248                if (ref != null) {
249                    // Remove the empty SoftReference if any
250                    cache.remove(desiredLocale, ref);
251                }
252                ref = cache.putIfAbsent(desiredLocale, new SoftReference<>(result));
253                if (ref == null) {
254                    break;
255                }
256                Collator cachedColl = ref.get();
257                if (cachedColl != null) {
258                    result = cachedColl;
259                    break;
260                }
261            }
262        }
263        return (Collator) result.clone(); // make the world safe
264    }
265
266    /**
267     * Compares the source string to the target string according to the
268     * collation rules for this Collator.  Returns an integer less than,
269     * equal to or greater than zero depending on whether the source String is
270     * less than, equal to or greater than the target string.  See the Collator
271     * class description for an example of use.
272     * <p>
273     * For a one time comparison, this method has the best performance. If a
274     * given String will be involved in multiple comparisons, CollationKey.compareTo
275     * has the best performance. See the Collator class description for an example
276     * using CollationKeys.
277     * @param source the source string.
278     * @param target the target string.
279     * @return Returns an integer value. Value is less than zero if source is less than
280     * target, value is zero if source and target are equal, value is greater than zero
281     * if source is greater than target.
282     * @see java.text.CollationKey
283     * @see java.text.Collator#getCollationKey
284     */
285    public abstract int compare(String source, String target);
286
287    /**
288     * Compares its two arguments for order.  Returns a negative integer,
289     * zero, or a positive integer as the first argument is less than, equal
290     * to, or greater than the second.
291     * <p>
292     * This implementation merely returns
293     *  <code> compare((String)o1, (String)o2) </code>.
294     *
295     * @return a negative integer, zero, or a positive integer as the
296     *         first argument is less than, equal to, or greater than the
297     *         second.
298     * @exception ClassCastException the arguments cannot be cast to Strings.
299     * @see java.util.Comparator
300     * @since   1.2
301     */
302    @Override
303    public int compare(Object o1, Object o2) {
304    return compare((String)o1, (String)o2);
305    }
306
307    /**
308     * Transforms the String into a series of bits that can be compared bitwise
309     * to other CollationKeys. CollationKeys provide better performance than
310     * Collator.compare when Strings are involved in multiple comparisons.
311     * See the Collator class description for an example using CollationKeys.
312     * @param source the string to be transformed into a collation key.
313     * @return the CollationKey for the given String based on this Collator's collation
314     * rules. If the source String is null, a null CollationKey is returned.
315     * @see java.text.CollationKey
316     * @see java.text.Collator#compare
317     */
318    public abstract CollationKey getCollationKey(String source);
319
320    /**
321     * Convenience method for comparing the equality of two strings based on
322     * this Collator's collation rules.
323     * @param source the source string to be compared with.
324     * @param target the target string to be compared with.
325     * @return true if the strings are equal according to the collation
326     * rules.  false, otherwise.
327     * @see java.text.Collator#compare
328     */
329    public boolean equals(String source, String target)
330    {
331        return (compare(source, target) == Collator.EQUAL);
332    }
333
334    /**
335     * Returns this Collator's strength property.  The strength property determines
336     * the minimum level of difference considered significant during comparison.
337     * See the Collator class description for an example of use.
338     * @return this Collator's current strength property.
339     * @see java.text.Collator#setStrength
340     * @see java.text.Collator#PRIMARY
341     * @see java.text.Collator#SECONDARY
342     * @see java.text.Collator#TERTIARY
343     * @see java.text.Collator#IDENTICAL
344     */
345    public synchronized int getStrength()
346    {
347        return strength;
348    }
349
350    /**
351     * Sets this Collator's strength property.  The strength property determines
352     * the minimum level of difference considered significant during comparison.
353     * See the Collator class description for an example of use.
354     * @param newStrength  the new strength value.
355     * @see java.text.Collator#getStrength
356     * @see java.text.Collator#PRIMARY
357     * @see java.text.Collator#SECONDARY
358     * @see java.text.Collator#TERTIARY
359     * @see java.text.Collator#IDENTICAL
360     * @exception  IllegalArgumentException If the new strength value is not one of
361     * PRIMARY, SECONDARY, TERTIARY or IDENTICAL.
362     */
363    public synchronized void setStrength(int newStrength) {
364        if ((newStrength != PRIMARY) &&
365            (newStrength != SECONDARY) &&
366            (newStrength != TERTIARY) &&
367            (newStrength != IDENTICAL)) {
368            throw new IllegalArgumentException("Incorrect comparison level.");
369        }
370        strength = newStrength;
371    }
372
373    /**
374     * Get the decomposition mode of this Collator. Decomposition mode
375     * determines how Unicode composed characters are handled. Adjusting
376     * decomposition mode allows the user to select between faster and more
377     * complete collation behavior.
378     * <p>The three values for decomposition mode are:
379     * <UL>
380     * <LI>NO_DECOMPOSITION,
381     * <LI>CANONICAL_DECOMPOSITION
382     * <LI>FULL_DECOMPOSITION.
383     * </UL>
384     * See the documentation for these three constants for a description
385     * of their meaning.
386     * @return the decomposition mode
387     * @see java.text.Collator#setDecomposition
388     * @see java.text.Collator#NO_DECOMPOSITION
389     * @see java.text.Collator#CANONICAL_DECOMPOSITION
390     * @see java.text.Collator#FULL_DECOMPOSITION
391     */
392    public synchronized int getDecomposition()
393    {
394        return decmp;
395    }
396    /**
397     * Set the decomposition mode of this Collator. See getDecomposition
398     * for a description of decomposition mode.
399     * @param decompositionMode  the new decomposition mode.
400     * @see java.text.Collator#getDecomposition
401     * @see java.text.Collator#NO_DECOMPOSITION
402     * @see java.text.Collator#CANONICAL_DECOMPOSITION
403     * @see java.text.Collator#FULL_DECOMPOSITION
404     * @exception IllegalArgumentException If the given value is not a valid decomposition
405     * mode.
406     */
407    public synchronized void setDecomposition(int decompositionMode) {
408        if ((decompositionMode != NO_DECOMPOSITION) &&
409            (decompositionMode != CANONICAL_DECOMPOSITION) &&
410            (decompositionMode != FULL_DECOMPOSITION)) {
411            throw new IllegalArgumentException("Wrong decomposition mode.");
412        }
413        decmp = decompositionMode;
414    }
415
416    /**
417     * Returns an array of all locales for which the
418     * <code>getInstance</code> methods of this class can return
419     * localized instances.
420     * The returned array represents the union of locales supported
421     * by the Java runtime and by installed
422     * {@link java.text.spi.CollatorProvider CollatorProvider} implementations.
423     * It must contain at least a Locale instance equal to
424     * {@link java.util.Locale#US Locale.US}.
425     *
426     * @return An array of locales for which localized
427     *         <code>Collator</code> instances are available.
428     */
429    public static synchronized Locale[] getAvailableLocales() {
430        LocaleServiceProviderPool pool =
431            LocaleServiceProviderPool.getPool(CollatorProvider.class);
432        return pool.getAvailableLocales();
433    }
434
435    /**
436     * Overrides Cloneable
437     */
438    @Override
439    public Object clone()
440    {
441        try {
442            return (Collator)super.clone();
443        } catch (CloneNotSupportedException e) {
444            throw new InternalError(e);
445        }
446    }
447
448    /**
449     * Compares the equality of two Collators.
450     * @param that the Collator to be compared with this.
451     * @return true if this Collator is the same as that Collator;
452     * false otherwise.
453     */
454    @Override
455    public boolean equals(Object that)
456    {
457        if (this == that) {
458            return true;
459        }
460        if (that == null) {
461            return false;
462        }
463        if (getClass() != that.getClass()) {
464            return false;
465        }
466        Collator other = (Collator) that;
467        return ((strength == other.strength) &&
468                (decmp == other.decmp));
469    }
470
471    /**
472     * Generates the hash code for this Collator.
473     */
474    @Override
475    public abstract int hashCode();
476
477    /**
478     * Default constructor.  This constructor is
479     * protected so subclasses can get access to it. Users typically create
480     * a Collator sub-class by calling the factory method getInstance.
481     * @see java.text.Collator#getInstance
482     */
483    protected Collator()
484    {
485        strength = TERTIARY;
486        decmp = CANONICAL_DECOMPOSITION;
487    }
488
489    private int strength = 0;
490    private int decmp = 0;
491    private static final ConcurrentMap<Locale, SoftReference<Collator>> cache
492            = new ConcurrentHashMap<>();
493
494    //
495    // FIXME: These three constants should be removed.
496    //
497    /**
498     * LESS is returned if source string is compared to be less than target
499     * string in the compare() method.
500     * @see java.text.Collator#compare
501     */
502    static final int LESS = -1;
503    /**
504     * EQUAL is returned if source string is compared to be equal to target
505     * string in the compare() method.
506     * @see java.text.Collator#compare
507     */
508    static final int EQUAL = 0;
509    /**
510     * GREATER is returned if source string is compared to be greater than
511     * target string in the compare() method.
512     * @see java.text.Collator#compare
513     */
514    static final int GREATER = 1;
515 }
516