RBCollationTables.java revision 12745:f068a4ffddd2
150476Speter/*
247831Sfoxfair * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
347831Sfoxfair * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
447831Sfoxfair *
547831Sfoxfair * This code is free software; you can redistribute it and/or modify it
6174990Sache * under the terms of the GNU General Public License version 2 only, as
7290150Sdelphij * published by the Free Software Foundation.  Oracle designates this
8290150Sdelphij * particular file as subject to the "Classpath" exception as provided
9290150Sdelphij * by Oracle in the LICENSE file that accompanied this code.
10290150Sdelphij *
11290150Sdelphij * This code is distributed in the hope that it will be useful, but WITHOUT
12290150Sdelphij * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13290150Sdelphij * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14290150Sdelphij * version 2 for more details (a copy is included in the LICENSE file that
15290150Sdelphij * accompanied this code).
16290150Sdelphij *
17290150Sdelphij * You should have received a copy of the GNU General Public License version
18290150Sdelphij * 2 along with this work; if not, write to the Free Software Foundation,
1947831Sfoxfair * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20174990Sache *
2147831Sfoxfair * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
2247831Sfoxfair * or visit www.oracle.com if you need additional information or have any
2347831Sfoxfair * questions.
2447831Sfoxfair */
2547831Sfoxfair
2647831Sfoxfair/*
2747831Sfoxfair * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
2847831Sfoxfair * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
2947831Sfoxfair *
3047831Sfoxfair *   The original version of this source code and documentation is copyrighted
3147831Sfoxfair * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
3247831Sfoxfair * materials are provided under terms of a License Agreement between Taligent
3347831Sfoxfair * and Sun. This technology is protected by multiple US and International
3447831Sfoxfair * patents. This notice and attribution to Taligent may not be removed.
3547831Sfoxfair *   Taligent is a registered trademark of Taligent, Inc.
3647831Sfoxfair *
3774606Sache */
3874606Sache
3974606Sachepackage java.text;
4074606Sache
4174606Sacheimport java.util.Vector;
4274606Sacheimport sun.text.UCompactIntArray;
4374606Sacheimport sun.text.IntHashtable;
4474606Sache
4547831Sfoxfair/**
4647831Sfoxfair * This class contains the static state of a RuleBasedCollator: The various
4747831Sfoxfair * tables that are used by the collation routines.  Several RuleBasedCollators
4847831Sfoxfair * can share a single RBCollationTables object, easing memory requirements and
4947831Sfoxfair * improving performance.
5047831Sfoxfair */
5147831Sfoxfairfinal class RBCollationTables {
5247831Sfoxfair    //===========================================================================================
5347831Sfoxfair    //  The following diagram shows the data structure of the RBCollationTables object.
5447831Sfoxfair    //  Suppose we have the rule, where 'o-umlaut' is the unicode char 0x00F6.
5547831Sfoxfair    //  "a, A < b, B < c, C, ch, cH, Ch, CH < d, D ... < o, O; 'o-umlaut'/E, 'O-umlaut'/E ...".
5647831Sfoxfair    //  What the rule says is, sorts 'ch'ligatures and 'c' only with tertiary difference and
5747831Sfoxfair    //  sorts 'o-umlaut' as if it's always expanded with 'e'.
5847831Sfoxfair    //
5947831Sfoxfair    // mapping table                     contracting list           expanding list
6047831Sfoxfair    // (contains all unicode char
6174570Sache    //  entries)                   ___    ____________       _________________________
6247831Sfoxfair    //  ________                +>|_*_|->|'c' |v('c') |  +>|v('o')|v('umlaut')|v('e')|
6347831Sfoxfair    // |_\u0001_|-> v('\u0001') | |_:_|  |------------|  | |-------------------------|
6447831Sfoxfair    // |_\u0002_|-> v('\u0002') | |_:_|  |'ch'|v('ch')|  | |             :           |
6554090Sache    // |____:___|               | |_:_|  |------------|  | |-------------------------|
6647831Sfoxfair    // |____:___|               |        |'cH'|v('cH')|  | |             :           |
6747831Sfoxfair    // |__'a'___|-> v('a')      |        |------------|  | |-------------------------|
6847831Sfoxfair    // |__'b'___|-> v('b')      |        |'Ch'|v('Ch')|  | |             :           |
6947831Sfoxfair    // |____:___|               |        |------------|  | |-------------------------|
7047831Sfoxfair    // |____:___|               |        |'CH'|v('CH')|  | |             :           |
7147831Sfoxfair    // |___'c'__|----------------         ------------   | |-------------------------|
7247831Sfoxfair    // |____:___|                                        | |             :           |
7347831Sfoxfair    // |o-umlaut|----------------------------------------  |_________________________|
7447831Sfoxfair    // |____:___|
7547831Sfoxfair    //
7647831Sfoxfair    // Noted by Helena Shih on 6/23/97
77290150Sdelphij    //============================================================================================
7853943Sache
79174990Sache    public RBCollationTables(String rules, int decmp) throws ParseException {
8053943Sache        this.rules = rules;
8153943Sache
8253943Sache        RBTableBuilder builder = new RBTableBuilder(new BuildAPI());
8353943Sache        builder.build(rules, decmp); // this object is filled in through
8453943Sache                                            // the BuildAPI object
8553943Sache    }
8653943Sache
8753943Sache    final class BuildAPI {
8853943Sache        /**
8953943Sache         * Private constructor.  Prevents anyone else besides RBTableBuilder
9053943Sache         * from gaining direct access to the internals of this class.
9153943Sache         */
9253943Sache        private BuildAPI() {
9353943Sache        }
9474413Sache
9553943Sache        /**
9674413Sache         * This function is used by RBTableBuilder to fill in all the members of this
9753961Sache         * object.  (Effectively, the builder class functions as a "friend" of this
9874413Sache         * class, but to avoid changing too much of the logic, it carries around "shadow"
9953961Sache         * copies of all these variables until the end of the build process and then
10074413Sache         * copies them en masse into the actual tables object once all the construction
10174413Sache         * logic is complete.  This function does that "copying en masse".
102         * @param f2ary The value for frenchSec (the French-secondary flag)
103         * @param swap The value for SE Asian swapping rule
104         * @param map The collator's character-mapping table (the value for mapping)
105         * @param cTbl The collator's contracting-character table (the value for contractTable)
106         * @param eTbl The collator's expanding-character table (the value for expandTable)
107         * @param cFlgs The hash table of characters that participate in contracting-
108         *              character sequences (the value for contractFlags)
109         * @param mso The value for maxSecOrder
110         * @param mto The value for maxTerOrder
111         */
112        void fillInTables(boolean f2ary,
113                          boolean swap,
114                          UCompactIntArray map,
115                          Vector<Vector<EntryPair>> cTbl,
116                          Vector<int[]> eTbl,
117                          IntHashtable cFlgs,
118                          short mso,
119                          short mto) {
120            frenchSec = f2ary;
121            seAsianSwapping = swap;
122            mapping = map;
123            contractTable = cTbl;
124            expandTable = eTbl;
125            contractFlags = cFlgs;
126            maxSecOrder = mso;
127            maxTerOrder = mto;
128        }
129    }
130
131    /**
132     * Gets the table-based rules for the collation object.
133     * @return returns the collation rules that the table collation object
134     * was created from.
135     */
136    public String getRules()
137    {
138        return rules;
139    }
140
141    public boolean isFrenchSec() {
142        return frenchSec;
143    }
144
145    public boolean isSEAsianSwapping() {
146        return seAsianSwapping;
147    }
148
149    // ==============================================================
150    // internal (for use by CollationElementIterator)
151    // ==============================================================
152
153    /**
154     *  Get the entry of hash table of the contracting string in the collation
155     *  table.
156     *  @param ch the starting character of the contracting string
157     */
158    Vector<EntryPair> getContractValues(int ch)
159    {
160        int index = mapping.elementAt(ch);
161        return getContractValuesImpl(index - CONTRACTCHARINDEX);
162    }
163
164    //get contract values from contractTable by index
165    private Vector<EntryPair> getContractValuesImpl(int index)
166    {
167        if (index >= 0)
168        {
169            return contractTable.elementAt(index);
170        }
171        else // not found
172        {
173            return null;
174        }
175    }
176
177    /**
178     * Returns true if this character appears anywhere in a contracting
179     * character sequence.  (Used by CollationElementIterator.setOffset().)
180     */
181    boolean usedInContractSeq(int c) {
182        return contractFlags.get(c) == 1;
183    }
184
185    /**
186      * Return the maximum length of any expansion sequences that end
187      * with the specified comparison order.
188      *
189      * @param order a collation order returned by previous or next.
190      * @return the maximum length of any expansion seuences ending
191      *         with the specified order.
192      *
193      * @see CollationElementIterator#getMaxExpansion
194      */
195    int getMaxExpansion(int order) {
196        int result = 1;
197
198        if (expandTable != null) {
199            // Right now this does a linear search through the entire
200            // expansion table.  If a collator had a large number of expansions,
201            // this could cause a performance problem, but in practise that
202            // rarely happens
203            for (int i = 0; i < expandTable.size(); i++) {
204                int[] valueList = expandTable.elementAt(i);
205                int length = valueList.length;
206
207                if (length > result && valueList[length-1] == order) {
208                    result = length;
209                }
210            }
211        }
212
213        return result;
214    }
215
216    /**
217     * Get the entry of hash table of the expanding string in the collation
218     * table.
219     * @param idx the index of the expanding string value list
220     */
221    final int[] getExpandValueList(int idx) {
222        return expandTable.elementAt(idx - EXPANDCHARINDEX);
223    }
224
225    /**
226     * Get the comarison order of a character from the collation table.
227     * @return the comparison order of a character.
228     */
229    int getUnicodeOrder(int ch) {
230        return mapping.elementAt(ch);
231    }
232
233    short getMaxSecOrder() {
234        return maxSecOrder;
235    }
236
237    short getMaxTerOrder() {
238        return maxTerOrder;
239    }
240
241    /**
242     * Reverse a string.
243     */
244    //shemran/Note: this is used for secondary order value reverse, no
245    //              need to consider supplementary pair.
246    static void reverse (StringBuffer result, int from, int to)
247    {
248        int i = from;
249        char swap;
250
251        int j = to - 1;
252        while (i < j) {
253            swap =  result.charAt(i);
254            result.setCharAt(i, result.charAt(j));
255            result.setCharAt(j, swap);
256            i++;
257            j--;
258        }
259    }
260
261    static final int getEntry(Vector<EntryPair> list, String name, boolean fwd) {
262        for (int i = 0; i < list.size(); i++) {
263            EntryPair pair = list.elementAt(i);
264            if (pair.fwd == fwd && pair.entryName.equals(name)) {
265                return i;
266            }
267        }
268        return UNMAPPED;
269    }
270
271    // ==============================================================
272    // constants
273    // ==============================================================
274    //sherman/Todo: is the value big enough?????
275    static final int EXPANDCHARINDEX = 0x7E000000; // Expand index follows
276    static final int CONTRACTCHARINDEX = 0x7F000000;  // contract indexes follow
277    static final int UNMAPPED = 0xFFFFFFFF;
278
279    static final int PRIMARYORDERMASK = 0xffff0000;
280    static final int SECONDARYORDERMASK = 0x0000ff00;
281    static final int TERTIARYORDERMASK = 0x000000ff;
282    static final int PRIMARYDIFFERENCEONLY = 0xffff0000;
283    static final int SECONDARYDIFFERENCEONLY = 0xffffff00;
284    static final int PRIMARYORDERSHIFT = 16;
285    static final int SECONDARYORDERSHIFT = 8;
286
287    // ==============================================================
288    // instance variables
289    // ==============================================================
290    private String rules = null;
291    private boolean frenchSec = false;
292    private boolean seAsianSwapping = false;
293
294    private UCompactIntArray mapping = null;
295    private Vector<Vector<EntryPair>> contractTable = null;
296    private Vector<int[]> expandTable = null;
297    private IntHashtable contractFlags = null;
298
299    private short maxSecOrder = 0;
300    private short maxTerOrder = 0;
301}
302