CollationElementIterator.java revision 12745:f068a4ffddd2
1/*
2 * Copyright (c) 1996, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26/*
27 * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
28 * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
29 *
30 *   The original version of this source code and documentation is copyrighted
31 * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
32 * materials are provided under terms of a License Agreement between Taligent
33 * and Sun. This technology is protected by multiple US and International
34 * patents. This notice and attribution to Taligent may not be removed.
35 *   Taligent is a registered trademark of Taligent, Inc.
36 *
37 */
38
39package java.text;
40
41import java.lang.Character;
42import java.util.Vector;
43import sun.text.CollatorUtilities;
44import sun.text.normalizer.NormalizerBase;
45
46/**
47 * The <code>CollationElementIterator</code> class is used as an iterator
48 * to walk through each character of an international string. Use the iterator
49 * to return the ordering priority of the positioned character. The ordering
50 * priority of a character, which we refer to as a key, defines how a character
51 * is collated in the given collation object.
52 *
53 * <p>
54 * For example, consider the following in Spanish:
55 * <blockquote>
56 * <pre>
57 * "ca" &rarr; the first key is key('c') and second key is key('a').
58 * "cha" &rarr; the first key is key('ch') and second key is key('a').
59 * </pre>
60 * </blockquote>
61 * And in German,
62 * <blockquote>
63 * <pre>
64 * "\u00e4b" &rarr; the first key is key('a'), the second key is key('e'), and
65 * the third key is key('b').
66 * </pre>
67 * </blockquote>
68 * The key of a character is an integer composed of primary order(short),
69 * secondary order(byte), and tertiary order(byte). Java strictly defines
70 * the size and signedness of its primitive data types. Therefore, the static
71 * functions <code>primaryOrder</code>, <code>secondaryOrder</code>, and
72 * <code>tertiaryOrder</code> return <code>int</code>, <code>short</code>,
73 * and <code>short</code> respectively to ensure the correctness of the key
74 * value.
75 *
76 * <p>
77 * Example of the iterator usage,
78 * <blockquote>
79 * <pre>
80 *
81 *  String testString = "This is a test";
82 *  Collator col = Collator.getInstance();
83 *  if (col instanceof RuleBasedCollator) {
84 *      RuleBasedCollator ruleBasedCollator = (RuleBasedCollator)col;
85 *      CollationElementIterator collationElementIterator = ruleBasedCollator.getCollationElementIterator(testString);
86 *      int primaryOrder = CollationElementIterator.primaryOrder(collationElementIterator.next());
87 *          :
88 *  }
89 * </pre>
90 * </blockquote>
91 *
92 * <p>
93 * <code>CollationElementIterator.next</code> returns the collation order
94 * of the next character. A collation order consists of primary order,
95 * secondary order and tertiary order. The data type of the collation
96 * order is <strong>int</strong>. The first 16 bits of a collation order
97 * is its primary order; the next 8 bits is the secondary order and the
98 * last 8 bits is the tertiary order.
99 *
100 * <p><b>Note:</b> <code>CollationElementIterator</code> is a part of
101 * <code>RuleBasedCollator</code> implementation. It is only usable
102 * with <code>RuleBasedCollator</code> instances.
103 *
104 * @see                Collator
105 * @see                RuleBasedCollator
106 * @author             Helena Shih, Laura Werner, Richard Gillam
107 */
108public final class CollationElementIterator
109{
110    /**
111     * Null order which indicates the end of string is reached by the
112     * cursor.
113     */
114    public static final int NULLORDER = 0xffffffff;
115
116    /**
117     * CollationElementIterator constructor.  This takes the source string and
118     * the collation object.  The cursor will walk thru the source string based
119     * on the predefined collation rules.  If the source string is empty,
120     * NULLORDER will be returned on the calls to next().
121     * @param sourceText the source string.
122     * @param owner the collation object.
123     */
124    CollationElementIterator(String sourceText, RuleBasedCollator owner) {
125        this.owner = owner;
126        ordering = owner.getTables();
127        if ( sourceText.length() != 0 ) {
128            NormalizerBase.Mode mode =
129                CollatorUtilities.toNormalizerMode(owner.getDecomposition());
130            text = new NormalizerBase(sourceText, mode);
131        }
132    }
133
134    /**
135     * CollationElementIterator constructor.  This takes the source string and
136     * the collation object.  The cursor will walk thru the source string based
137     * on the predefined collation rules.  If the source string is empty,
138     * NULLORDER will be returned on the calls to next().
139     * @param sourceText the source string.
140     * @param owner the collation object.
141     */
142    CollationElementIterator(CharacterIterator sourceText, RuleBasedCollator owner) {
143        this.owner = owner;
144        ordering = owner.getTables();
145        NormalizerBase.Mode mode =
146            CollatorUtilities.toNormalizerMode(owner.getDecomposition());
147        text = new NormalizerBase(sourceText, mode);
148    }
149
150    /**
151     * Resets the cursor to the beginning of the string.  The next call
152     * to next() will return the first collation element in the string.
153     */
154    public void reset()
155    {
156        if (text != null) {
157            text.reset();
158            NormalizerBase.Mode mode =
159                CollatorUtilities.toNormalizerMode(owner.getDecomposition());
160            text.setMode(mode);
161        }
162        buffer = null;
163        expIndex = 0;
164        swapOrder = 0;
165    }
166
167    /**
168     * Get the next collation element in the string.  <p>This iterator iterates
169     * over a sequence of collation elements that were built from the string.
170     * Because there isn't necessarily a one-to-one mapping from characters to
171     * collation elements, this doesn't mean the same thing as "return the
172     * collation element [or ordering priority] of the next character in the
173     * string".</p>
174     * <p>This function returns the collation element that the iterator is currently
175     * pointing to and then updates the internal pointer to point to the next element.
176     * previous() updates the pointer first and then returns the element.  This
177     * means that when you change direction while iterating (i.e., call next() and
178     * then call previous(), or call previous() and then call next()), you'll get
179     * back the same element twice.</p>
180     *
181     * @return the next collation element
182     */
183    public int next()
184    {
185        if (text == null) {
186            return NULLORDER;
187        }
188        NormalizerBase.Mode textMode = text.getMode();
189        // convert the owner's mode to something the Normalizer understands
190        NormalizerBase.Mode ownerMode =
191            CollatorUtilities.toNormalizerMode(owner.getDecomposition());
192        if (textMode != ownerMode) {
193            text.setMode(ownerMode);
194        }
195
196        // if buffer contains any decomposed char values
197        // return their strength orders before continuing in
198        // the Normalizer's CharacterIterator.
199        if (buffer != null) {
200            if (expIndex < buffer.length) {
201                return strengthOrder(buffer[expIndex++]);
202            } else {
203                buffer = null;
204                expIndex = 0;
205            }
206        } else if (swapOrder != 0) {
207            if (Character.isSupplementaryCodePoint(swapOrder)) {
208                char[] chars = Character.toChars(swapOrder);
209                swapOrder = chars[1];
210                return chars[0] << 16;
211            }
212            int order = swapOrder << 16;
213            swapOrder = 0;
214            return order;
215        }
216        int ch  = text.next();
217
218        // are we at the end of Normalizer's text?
219        if (ch == NormalizerBase.DONE) {
220            return NULLORDER;
221        }
222
223        int value = ordering.getUnicodeOrder(ch);
224        if (value == RuleBasedCollator.UNMAPPED) {
225            swapOrder = ch;
226            return UNMAPPEDCHARVALUE;
227        }
228        else if (value >= RuleBasedCollator.CONTRACTCHARINDEX) {
229            value = nextContractChar(ch);
230        }
231        if (value >= RuleBasedCollator.EXPANDCHARINDEX) {
232            buffer = ordering.getExpandValueList(value);
233            expIndex = 0;
234            value = buffer[expIndex++];
235        }
236
237        if (ordering.isSEAsianSwapping()) {
238            int consonant;
239            if (isThaiPreVowel(ch)) {
240                consonant = text.next();
241                if (isThaiBaseConsonant(consonant)) {
242                    buffer = makeReorderedBuffer(consonant, value, buffer, true);
243                    value = buffer[0];
244                    expIndex = 1;
245                } else if (consonant != NormalizerBase.DONE) {
246                    text.previous();
247                }
248            }
249            if (isLaoPreVowel(ch)) {
250                consonant = text.next();
251                if (isLaoBaseConsonant(consonant)) {
252                    buffer = makeReorderedBuffer(consonant, value, buffer, true);
253                    value = buffer[0];
254                    expIndex = 1;
255                } else if (consonant != NormalizerBase.DONE) {
256                    text.previous();
257                }
258            }
259        }
260
261        return strengthOrder(value);
262    }
263
264    /**
265     * Get the previous collation element in the string.  <p>This iterator iterates
266     * over a sequence of collation elements that were built from the string.
267     * Because there isn't necessarily a one-to-one mapping from characters to
268     * collation elements, this doesn't mean the same thing as "return the
269     * collation element [or ordering priority] of the previous character in the
270     * string".</p>
271     * <p>This function updates the iterator's internal pointer to point to the
272     * collation element preceding the one it's currently pointing to and then
273     * returns that element, while next() returns the current element and then
274     * updates the pointer.  This means that when you change direction while
275     * iterating (i.e., call next() and then call previous(), or call previous()
276     * and then call next()), you'll get back the same element twice.</p>
277     *
278     * @return the previous collation element
279     * @since 1.2
280     */
281    public int previous()
282    {
283        if (text == null) {
284            return NULLORDER;
285        }
286        NormalizerBase.Mode textMode = text.getMode();
287        // convert the owner's mode to something the Normalizer understands
288        NormalizerBase.Mode ownerMode =
289            CollatorUtilities.toNormalizerMode(owner.getDecomposition());
290        if (textMode != ownerMode) {
291            text.setMode(ownerMode);
292        }
293        if (buffer != null) {
294            if (expIndex > 0) {
295                return strengthOrder(buffer[--expIndex]);
296            } else {
297                buffer = null;
298                expIndex = 0;
299            }
300        } else if (swapOrder != 0) {
301            if (Character.isSupplementaryCodePoint(swapOrder)) {
302                char[] chars = Character.toChars(swapOrder);
303                swapOrder = chars[1];
304                return chars[0] << 16;
305            }
306            int order = swapOrder << 16;
307            swapOrder = 0;
308            return order;
309        }
310        int ch = text.previous();
311        if (ch == NormalizerBase.DONE) {
312            return NULLORDER;
313        }
314
315        int value = ordering.getUnicodeOrder(ch);
316
317        if (value == RuleBasedCollator.UNMAPPED) {
318            swapOrder = UNMAPPEDCHARVALUE;
319            return ch;
320        } else if (value >= RuleBasedCollator.CONTRACTCHARINDEX) {
321            value = prevContractChar(ch);
322        }
323        if (value >= RuleBasedCollator.EXPANDCHARINDEX) {
324            buffer = ordering.getExpandValueList(value);
325            expIndex = buffer.length;
326            value = buffer[--expIndex];
327        }
328
329        if (ordering.isSEAsianSwapping()) {
330            int vowel;
331            if (isThaiBaseConsonant(ch)) {
332                vowel = text.previous();
333                if (isThaiPreVowel(vowel)) {
334                    buffer = makeReorderedBuffer(vowel, value, buffer, false);
335                    expIndex = buffer.length - 1;
336                    value = buffer[expIndex];
337                } else {
338                    text.next();
339                }
340            }
341            if (isLaoBaseConsonant(ch)) {
342                vowel = text.previous();
343                if (isLaoPreVowel(vowel)) {
344                    buffer = makeReorderedBuffer(vowel, value, buffer, false);
345                    expIndex = buffer.length - 1;
346                    value = buffer[expIndex];
347                } else {
348                    text.next();
349                }
350            }
351        }
352
353        return strengthOrder(value);
354    }
355
356    /**
357     * Return the primary component of a collation element.
358     * @param order the collation element
359     * @return the element's primary component
360     */
361    public static final int primaryOrder(int order)
362    {
363        order &= RBCollationTables.PRIMARYORDERMASK;
364        return (order >>> RBCollationTables.PRIMARYORDERSHIFT);
365    }
366    /**
367     * Return the secondary component of a collation element.
368     * @param order the collation element
369     * @return the element's secondary component
370     */
371    public static final short secondaryOrder(int order)
372    {
373        order = order & RBCollationTables.SECONDARYORDERMASK;
374        return ((short)(order >> RBCollationTables.SECONDARYORDERSHIFT));
375    }
376    /**
377     * Return the tertiary component of a collation element.
378     * @param order the collation element
379     * @return the element's tertiary component
380     */
381    public static final short tertiaryOrder(int order)
382    {
383        return ((short)(order &= RBCollationTables.TERTIARYORDERMASK));
384    }
385
386    /**
387     *  Get the comparison order in the desired strength.  Ignore the other
388     *  differences.
389     *  @param order The order value
390     */
391    final int strengthOrder(int order)
392    {
393        int s = owner.getStrength();
394        if (s == Collator.PRIMARY)
395        {
396            order &= RBCollationTables.PRIMARYDIFFERENCEONLY;
397        } else if (s == Collator.SECONDARY)
398        {
399            order &= RBCollationTables.SECONDARYDIFFERENCEONLY;
400        }
401        return order;
402    }
403
404    /**
405     * Sets the iterator to point to the collation element corresponding to
406     * the specified character (the parameter is a CHARACTER offset in the
407     * original string, not an offset into its corresponding sequence of
408     * collation elements).  The value returned by the next call to next()
409     * will be the collation element corresponding to the specified position
410     * in the text.  If that position is in the middle of a contracting
411     * character sequence, the result of the next call to next() is the
412     * collation element for that sequence.  This means that getOffset()
413     * is not guaranteed to return the same value as was passed to a preceding
414     * call to setOffset().
415     *
416     * @param newOffset The new character offset into the original text.
417     * @since 1.2
418     */
419    @SuppressWarnings("deprecation") // getBeginIndex, getEndIndex and setIndex are deprecated
420    public void setOffset(int newOffset)
421    {
422        if (text != null) {
423            if (newOffset < text.getBeginIndex()
424                || newOffset >= text.getEndIndex()) {
425                    text.setIndexOnly(newOffset);
426            } else {
427                int c = text.setIndex(newOffset);
428
429                // if the desired character isn't used in a contracting character
430                // sequence, bypass all the backing-up logic-- we're sitting on
431                // the right character already
432                if (ordering.usedInContractSeq(c)) {
433                    // walk backwards through the string until we see a character
434                    // that DOESN'T participate in a contracting character sequence
435                    while (ordering.usedInContractSeq(c)) {
436                        c = text.previous();
437                    }
438                    // now walk forward using this object's next() method until
439                    // we pass the starting point and set our current position
440                    // to the beginning of the last "character" before or at
441                    // our starting position
442                    int last = text.getIndex();
443                    while (text.getIndex() <= newOffset) {
444                        last = text.getIndex();
445                        next();
446                    }
447                    text.setIndexOnly(last);
448                    // we don't need this, since last is the last index
449                    // that is the starting of the contraction which encompass
450                    // newOffset
451                    // text.previous();
452                }
453            }
454        }
455        buffer = null;
456        expIndex = 0;
457        swapOrder = 0;
458    }
459
460    /**
461     * Returns the character offset in the original text corresponding to the next
462     * collation element.  (That is, getOffset() returns the position in the text
463     * corresponding to the collation element that will be returned by the next
464     * call to next().)  This value will always be the index of the FIRST character
465     * corresponding to the collation element (a contracting character sequence is
466     * when two or more characters all correspond to the same collation element).
467     * This means if you do setOffset(x) followed immediately by getOffset(), getOffset()
468     * won't necessarily return x.
469     *
470     * @return The character offset in the original text corresponding to the collation
471     * element that will be returned by the next call to next().
472     * @since 1.2
473     */
474    public int getOffset()
475    {
476        return (text != null) ? text.getIndex() : 0;
477    }
478
479
480    /**
481     * Return the maximum length of any expansion sequences that end
482     * with the specified comparison order.
483     * @param order a collation order returned by previous or next.
484     * @return the maximum length of any expansion sequences ending
485     *         with the specified order.
486     * @since 1.2
487     */
488    public int getMaxExpansion(int order)
489    {
490        return ordering.getMaxExpansion(order);
491    }
492
493    /**
494     * Set a new string over which to iterate.
495     *
496     * @param source  the new source text
497     * @since 1.2
498     */
499    public void setText(String source)
500    {
501        buffer = null;
502        swapOrder = 0;
503        expIndex = 0;
504        NormalizerBase.Mode mode =
505            CollatorUtilities.toNormalizerMode(owner.getDecomposition());
506        if (text == null) {
507            text = new NormalizerBase(source, mode);
508        } else {
509            text.setMode(mode);
510            text.setText(source);
511        }
512    }
513
514    /**
515     * Set a new string over which to iterate.
516     *
517     * @param source  the new source text.
518     * @since 1.2
519     */
520    public void setText(CharacterIterator source)
521    {
522        buffer = null;
523        swapOrder = 0;
524        expIndex = 0;
525        NormalizerBase.Mode mode =
526            CollatorUtilities.toNormalizerMode(owner.getDecomposition());
527        if (text == null) {
528            text = new NormalizerBase(source, mode);
529        } else {
530            text.setMode(mode);
531            text.setText(source);
532        }
533    }
534
535    //============================================================
536    // privates
537    //============================================================
538
539    /**
540     * Determine if a character is a Thai vowel (which sorts after
541     * its base consonant).
542     */
543    private static final boolean isThaiPreVowel(int ch) {
544        return (ch >= 0x0e40) && (ch <= 0x0e44);
545    }
546
547    /**
548     * Determine if a character is a Thai base consonant
549     */
550    private static final boolean isThaiBaseConsonant(int ch) {
551        return (ch >= 0x0e01) && (ch <= 0x0e2e);
552    }
553
554    /**
555     * Determine if a character is a Lao vowel (which sorts after
556     * its base consonant).
557     */
558    private static final boolean isLaoPreVowel(int ch) {
559        return (ch >= 0x0ec0) && (ch <= 0x0ec4);
560    }
561
562    /**
563     * Determine if a character is a Lao base consonant
564     */
565    private static final boolean isLaoBaseConsonant(int ch) {
566        return (ch >= 0x0e81) && (ch <= 0x0eae);
567    }
568
569    /**
570     * This method produces a buffer which contains the collation
571     * elements for the two characters, with colFirst's values preceding
572     * another character's.  Presumably, the other character precedes colFirst
573     * in logical order (otherwise you wouldn't need this method would you?).
574     * The assumption is that the other char's value(s) have already been
575     * computed.  If this char has a single element it is passed to this
576     * method as lastValue, and lastExpansion is null.  If it has an
577     * expansion it is passed in lastExpansion, and colLastValue is ignored.
578     */
579    private int[] makeReorderedBuffer(int colFirst,
580                                      int lastValue,
581                                      int[] lastExpansion,
582                                      boolean forward) {
583
584        int[] result;
585
586        int firstValue = ordering.getUnicodeOrder(colFirst);
587        if (firstValue >= RuleBasedCollator.CONTRACTCHARINDEX) {
588            firstValue = forward? nextContractChar(colFirst) : prevContractChar(colFirst);
589        }
590
591        int[] firstExpansion = null;
592        if (firstValue >= RuleBasedCollator.EXPANDCHARINDEX) {
593            firstExpansion = ordering.getExpandValueList(firstValue);
594        }
595
596        if (!forward) {
597            int temp1 = firstValue;
598            firstValue = lastValue;
599            lastValue = temp1;
600            int[] temp2 = firstExpansion;
601            firstExpansion = lastExpansion;
602            lastExpansion = temp2;
603        }
604
605        if (firstExpansion == null && lastExpansion == null) {
606            result = new int [2];
607            result[0] = firstValue;
608            result[1] = lastValue;
609        }
610        else {
611            int firstLength = firstExpansion==null? 1 : firstExpansion.length;
612            int lastLength = lastExpansion==null? 1 : lastExpansion.length;
613            result = new int[firstLength + lastLength];
614
615            if (firstExpansion == null) {
616                result[0] = firstValue;
617            }
618            else {
619                System.arraycopy(firstExpansion, 0, result, 0, firstLength);
620            }
621
622            if (lastExpansion == null) {
623                result[firstLength] = lastValue;
624            }
625            else {
626                System.arraycopy(lastExpansion, 0, result, firstLength, lastLength);
627            }
628        }
629
630        return result;
631    }
632
633    /**
634     *  Check if a comparison order is ignorable.
635     *  @return true if a character is ignorable, false otherwise.
636     */
637    static final boolean isIgnorable(int order)
638    {
639        return ((primaryOrder(order) == 0) ? true : false);
640    }
641
642    /**
643     * Get the ordering priority of the next contracting character in the
644     * string.
645     * @param ch the starting character of a contracting character token
646     * @return the next contracting character's ordering.  Returns NULLORDER
647     * if the end of string is reached.
648     */
649    private int nextContractChar(int ch)
650    {
651        // First get the ordering of this single character,
652        // which is always the first element in the list
653        Vector<EntryPair> list = ordering.getContractValues(ch);
654        EntryPair pair = list.firstElement();
655        int order = pair.value;
656
657        // find out the length of the longest contracting character sequence in the list.
658        // There's logic in the builder code to make sure the longest sequence is always
659        // the last.
660        pair = list.lastElement();
661        int maxLength = pair.entryName.length();
662
663        // (the Normalizer is cloned here so that the seeking we do in the next loop
664        // won't affect our real position in the text)
665        NormalizerBase tempText = (NormalizerBase)text.clone();
666
667        // extract the next maxLength characters in the string (we have to do this using the
668        // Normalizer to ensure that our offsets correspond to those the rest of the
669        // iterator is using) and store it in "fragment".
670        tempText.previous();
671        key.setLength(0);
672        int c = tempText.next();
673        while (maxLength > 0 && c != NormalizerBase.DONE) {
674            if (Character.isSupplementaryCodePoint(c)) {
675                key.append(Character.toChars(c));
676                maxLength -= 2;
677            } else {
678                key.append((char)c);
679                --maxLength;
680            }
681            c = tempText.next();
682        }
683        String fragment = key.toString();
684        // now that we have that fragment, iterate through this list looking for the
685        // longest sequence that matches the characters in the actual text.  (maxLength
686        // is used here to keep track of the length of the longest sequence)
687        // Upon exit from this loop, maxLength will contain the length of the matching
688        // sequence and order will contain the collation-element value corresponding
689        // to this sequence
690        maxLength = 1;
691        for (int i = list.size() - 1; i > 0; i--) {
692            pair = list.elementAt(i);
693            if (!pair.fwd)
694                continue;
695
696            if (fragment.startsWith(pair.entryName) && pair.entryName.length()
697                    > maxLength) {
698                maxLength = pair.entryName.length();
699                order = pair.value;
700            }
701        }
702
703        // seek our current iteration position to the end of the matching sequence
704        // and return the appropriate collation-element value (if there was no matching
705        // sequence, we're already seeked to the right position and order already contains
706        // the correct collation-element value for the single character)
707        while (maxLength > 1) {
708            c = text.next();
709            maxLength -= Character.charCount(c);
710        }
711        return order;
712    }
713
714    /**
715     * Get the ordering priority of the previous contracting character in the
716     * string.
717     * @param ch the starting character of a contracting character token
718     * @return the next contracting character's ordering.  Returns NULLORDER
719     * if the end of string is reached.
720     */
721    private int prevContractChar(int ch)
722    {
723        // This function is identical to nextContractChar(), except that we've
724        // switched things so that the next() and previous() calls on the Normalizer
725        // are switched and so that we skip entry pairs with the fwd flag turned on
726        // rather than off.  Notice that we still use append() and startsWith() when
727        // working on the fragment.  This is because the entry pairs that are used
728        // in reverse iteration have their names reversed already.
729        Vector<EntryPair> list = ordering.getContractValues(ch);
730        EntryPair pair = list.firstElement();
731        int order = pair.value;
732
733        pair = list.lastElement();
734        int maxLength = pair.entryName.length();
735
736        NormalizerBase tempText = (NormalizerBase)text.clone();
737
738        tempText.next();
739        key.setLength(0);
740        int c = tempText.previous();
741        while (maxLength > 0 && c != NormalizerBase.DONE) {
742            if (Character.isSupplementaryCodePoint(c)) {
743                key.append(Character.toChars(c));
744                maxLength -= 2;
745            } else {
746                key.append((char)c);
747                --maxLength;
748            }
749            c = tempText.previous();
750        }
751        String fragment = key.toString();
752
753        maxLength = 1;
754        for (int i = list.size() - 1; i > 0; i--) {
755            pair = list.elementAt(i);
756            if (pair.fwd)
757                continue;
758
759            if (fragment.startsWith(pair.entryName) && pair.entryName.length()
760                    > maxLength) {
761                maxLength = pair.entryName.length();
762                order = pair.value;
763            }
764        }
765
766        while (maxLength > 1) {
767            c = text.previous();
768            maxLength -= Character.charCount(c);
769        }
770        return order;
771    }
772
773    static final int UNMAPPEDCHARVALUE = 0x7FFF0000;
774
775    private NormalizerBase text = null;
776    private int[] buffer = null;
777    private int expIndex = 0;
778    private StringBuffer key = new StringBuffer(5);
779    private int swapOrder = 0;
780    private RBCollationTables ordering;
781    private RuleBasedCollator owner;
782}
783