CharacterData00.java.template revision 8845:4be14673b9bf
1/*
2 * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package java.lang;
27
28/** 
29 * The CharacterData00 class encapsulates the large tables once found in
30 * java.lang.Character
31*/
32
33class CharacterData00 extends CharacterData {
34    /* The character properties are currently encoded into 32 bits in the following manner:
35        1 bit   mirrored property
36        4 bits  directionality property
37        9 bits  signed offset used for converting case
38        1 bit   if 1, adding the signed offset converts the character to lowercase
39        1 bit   if 1, subtracting the signed offset converts the character to uppercase
40        1 bit   if 1, this character has a titlecase equivalent (possibly itself)
41        3 bits  0  may not be part of an identifier
42                1  ignorable control; may continue a Unicode identifier or Java identifier
43                2  may continue a Java identifier but not a Unicode identifier (unused)
44                3  may continue a Unicode identifier or Java identifier
45                4  is a Java whitespace character
46                5  may start or continue a Java identifier;
47                   may continue but not start a Unicode identifier (underscores)
48                6  may start or continue a Java identifier but not a Unicode identifier ($)
49                7  may start or continue a Unicode identifier or Java identifier
50                Thus:
51                   5, 6, 7 may start a Java identifier
52                   1, 2, 3, 5, 6, 7 may continue a Java identifier
53                   7 may start a Unicode identifier
54                   1, 3, 5, 7 may continue a Unicode identifier
55                   1 is ignorable within an identifier
56                   4 is Java whitespace
57        2 bits  0  this character has no numeric property
58                1  adding the digit offset to the character code and then
59                   masking with 0x1F will produce the desired numeric value
60                2  this character has a "strange" numeric value
61                3  a Java supradecimal digit: adding the digit offset to the
62                   character code, then masking with 0x1F, then adding 10
63                   will produce the desired numeric value
64        5 bits  digit offset
65        5 bits  character type
66
67        The encoding of character properties is subject to change at any time.
68     */
69
70    int getProperties(int ch) {
71        char offset = (char)ch;
72        int props = $$Lookup(offset);
73        return props;
74    }
75
76    int getPropertiesEx(int ch) {
77        char offset = (char)ch;
78        int props = $$LookupEx(offset);
79        return props;
80    }
81
82    int getType(int ch) {
83        int props = getProperties(ch);
84        return (props & $$maskType);
85    }
86
87    boolean isOtherLowercase(int ch) {
88        int props = getPropertiesEx(ch);
89        return (props & $$maskOtherLowercase) != 0;
90    }
91
92    boolean isOtherUppercase(int ch) {
93        int props = getPropertiesEx(ch);
94        return (props & $$maskOtherUppercase) != 0;
95    }
96
97    boolean isOtherAlphabetic(int ch) {
98        int props = getPropertiesEx(ch);
99        return (props & $$maskOtherAlphabetic) != 0;
100    }
101
102    boolean isIdeographic(int ch) {
103        int props = getPropertiesEx(ch);
104        return (props & $$maskIdeographic) != 0;
105    }
106
107    boolean isJavaIdentifierStart(int ch) {
108        int props = getProperties(ch);
109        return ((props & $$maskIdentifierInfo) >= $$lowJavaStart);
110    }
111
112    boolean isJavaIdentifierPart(int ch) {
113        int props = getProperties(ch);
114        return ((props & $$nonzeroJavaPart) != 0);
115    }
116
117    boolean isUnicodeIdentifierStart(int ch) {
118        int props = getProperties(ch);
119        return ((props & $$maskIdentifierInfo) == $$valueUnicodeStart);
120    }
121
122    boolean isUnicodeIdentifierPart(int ch) {
123        int props = getProperties(ch);
124        return ((props & $$maskUnicodePart) != 0);
125    }
126
127    boolean isIdentifierIgnorable(int ch) {
128        int props = getProperties(ch);
129        return ((props & $$maskIdentifierInfo) == $$valueIgnorable);
130    }
131
132    int toLowerCase(int ch) {
133        int mapChar = ch;
134        int val = getProperties(ch);
135
136        if ((val & $$maskLowerCase) != 0) {
137          if ((val & $$maskCaseOffset) == $$maskCaseOffset) {
138            switch(ch) {
139              // map the offset overflow chars
140            case 0x0130 : mapChar = 0x0069; break;
141            case 0x2126 : mapChar = 0x03C9; break;
142            case 0x212A : mapChar = 0x006B; break;
143            case 0x212B : mapChar = 0x00E5; break;
144              // map the titlecase chars with both a 1:M uppercase map
145              // and a lowercase map
146            case 0x1F88 : mapChar = 0x1F80; break;
147            case 0x1F89 : mapChar = 0x1F81; break;
148            case 0x1F8A : mapChar = 0x1F82; break;
149            case 0x1F8B : mapChar = 0x1F83; break;
150            case 0x1F8C : mapChar = 0x1F84; break;
151            case 0x1F8D : mapChar = 0x1F85; break;
152            case 0x1F8E : mapChar = 0x1F86; break;
153            case 0x1F8F : mapChar = 0x1F87; break;
154            case 0x1F98 : mapChar = 0x1F90; break;
155            case 0x1F99 : mapChar = 0x1F91; break;
156            case 0x1F9A : mapChar = 0x1F92; break;
157            case 0x1F9B : mapChar = 0x1F93; break;
158            case 0x1F9C : mapChar = 0x1F94; break;
159            case 0x1F9D : mapChar = 0x1F95; break;
160            case 0x1F9E : mapChar = 0x1F96; break;
161            case 0x1F9F : mapChar = 0x1F97; break;
162            case 0x1FA8 : mapChar = 0x1FA0; break;
163            case 0x1FA9 : mapChar = 0x1FA1; break;
164            case 0x1FAA : mapChar = 0x1FA2; break;
165            case 0x1FAB : mapChar = 0x1FA3; break;
166            case 0x1FAC : mapChar = 0x1FA4; break;
167            case 0x1FAD : mapChar = 0x1FA5; break;
168            case 0x1FAE : mapChar = 0x1FA6; break;
169            case 0x1FAF : mapChar = 0x1FA7; break;
170            case 0x1FBC : mapChar = 0x1FB3; break;
171            case 0x1FCC : mapChar = 0x1FC3; break;
172            case 0x1FFC : mapChar = 0x1FF3; break;
173
174            case 0x023A : mapChar = 0x2C65; break;
175            case 0x023E : mapChar = 0x2C66; break;
176            case 0x10A0 : mapChar = 0x2D00; break;
177            case 0x10A1 : mapChar = 0x2D01; break;
178            case 0x10A2 : mapChar = 0x2D02; break;
179            case 0x10A3 : mapChar = 0x2D03; break;
180            case 0x10A4 : mapChar = 0x2D04; break;
181            case 0x10A5 : mapChar = 0x2D05; break;
182            case 0x10A6 : mapChar = 0x2D06; break;
183            case 0x10A7 : mapChar = 0x2D07; break;
184            case 0x10A8 : mapChar = 0x2D08; break;
185            case 0x10A9 : mapChar = 0x2D09; break;
186            case 0x10AA : mapChar = 0x2D0A; break;
187            case 0x10AB : mapChar = 0x2D0B; break;
188            case 0x10AC : mapChar = 0x2D0C; break;
189            case 0x10AD : mapChar = 0x2D0D; break;
190            case 0x10AE : mapChar = 0x2D0E; break;
191            case 0x10AF : mapChar = 0x2D0F; break;
192            case 0x10B0 : mapChar = 0x2D10; break;
193            case 0x10B1 : mapChar = 0x2D11; break;
194            case 0x10B2 : mapChar = 0x2D12; break;
195            case 0x10B3 : mapChar = 0x2D13; break;
196            case 0x10B4 : mapChar = 0x2D14; break;
197            case 0x10B5 : mapChar = 0x2D15; break;
198            case 0x10B6 : mapChar = 0x2D16; break;
199            case 0x10B7 : mapChar = 0x2D17; break;
200            case 0x10B8 : mapChar = 0x2D18; break;
201            case 0x10B9 : mapChar = 0x2D19; break;
202            case 0x10BA : mapChar = 0x2D1A; break;
203            case 0x10BB : mapChar = 0x2D1B; break;
204            case 0x10BC : mapChar = 0x2D1C; break;
205            case 0x10BD : mapChar = 0x2D1D; break;
206            case 0x10BE : mapChar = 0x2D1E; break;
207            case 0x10BF : mapChar = 0x2D1F; break;
208            case 0x10C0 : mapChar = 0x2D20; break;
209            case 0x10C1 : mapChar = 0x2D21; break;
210            case 0x10C2 : mapChar = 0x2D22; break;
211            case 0x10C3 : mapChar = 0x2D23; break;
212            case 0x10C4 : mapChar = 0x2D24; break;
213            case 0x10C5 : mapChar = 0x2D25; break;
214            case 0x10C7 : mapChar = 0x2D27; break;
215            case 0x10CD : mapChar = 0x2D2D; break;
216            case 0x1E9E : mapChar = 0x00DF; break;
217            case 0x2C62 : mapChar = 0x026B; break;
218            case 0x2C63 : mapChar = 0x1D7D; break;
219            case 0x2C64 : mapChar = 0x027D; break;
220            case 0x2C6D : mapChar = 0x0251; break;
221            case 0x2C6E : mapChar = 0x0271; break;
222            case 0x2C6F : mapChar = 0x0250; break;
223            case 0x2C70 : mapChar = 0x0252; break;
224            case 0x2C7E : mapChar = 0x023F; break;
225            case 0x2C7F : mapChar = 0x0240; break;
226            case 0xA77D : mapChar = 0x1D79; break;
227            case 0xA78D : mapChar = 0x0265; break;
228            case 0xA7AA : mapChar = 0x0266; break;
229              // default mapChar is already set, so no
230              // need to redo it here.
231              // default       : mapChar = ch;
232            }
233          }
234          else {
235            int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset);
236            mapChar = ch + offset;
237          }
238        }
239        return mapChar;
240    }
241
242    int toUpperCase(int ch) {
243        int mapChar = ch;
244        int val = getProperties(ch);
245
246        if ((val & $$maskUpperCase) != 0) {
247          if ((val & $$maskCaseOffset) == $$maskCaseOffset) {
248            switch(ch) {
249              // map chars with overflow offsets
250            case 0x00B5 : mapChar = 0x039C; break;
251            case 0x017F : mapChar = 0x0053; break;
252            case 0x1FBE : mapChar = 0x0399; break;
253              // map char that have both a 1:1 and 1:M map
254            case 0x1F80 : mapChar = 0x1F88; break;
255            case 0x1F81 : mapChar = 0x1F89; break;
256            case 0x1F82 : mapChar = 0x1F8A; break;
257            case 0x1F83 : mapChar = 0x1F8B; break;
258            case 0x1F84 : mapChar = 0x1F8C; break;
259            case 0x1F85 : mapChar = 0x1F8D; break;
260            case 0x1F86 : mapChar = 0x1F8E; break;
261            case 0x1F87 : mapChar = 0x1F8F; break;
262            case 0x1F90 : mapChar = 0x1F98; break;
263            case 0x1F91 : mapChar = 0x1F99; break;
264            case 0x1F92 : mapChar = 0x1F9A; break;
265            case 0x1F93 : mapChar = 0x1F9B; break;
266            case 0x1F94 : mapChar = 0x1F9C; break;
267            case 0x1F95 : mapChar = 0x1F9D; break;
268            case 0x1F96 : mapChar = 0x1F9E; break;
269            case 0x1F97 : mapChar = 0x1F9F; break;
270            case 0x1FA0 : mapChar = 0x1FA8; break;
271            case 0x1FA1 : mapChar = 0x1FA9; break;
272            case 0x1FA2 : mapChar = 0x1FAA; break;
273            case 0x1FA3 : mapChar = 0x1FAB; break;
274            case 0x1FA4 : mapChar = 0x1FAC; break;
275            case 0x1FA5 : mapChar = 0x1FAD; break;
276            case 0x1FA6 : mapChar = 0x1FAE; break;
277            case 0x1FA7 : mapChar = 0x1FAF; break;
278            case 0x1FB3 : mapChar = 0x1FBC; break;
279            case 0x1FC3 : mapChar = 0x1FCC; break;
280            case 0x1FF3 : mapChar = 0x1FFC; break;
281
282            case 0x023F : mapChar = 0x2C7E; break;
283            case 0x0240 : mapChar = 0x2C7F; break;
284            case 0x0250 : mapChar = 0x2C6F; break;
285            case 0x0251 : mapChar = 0x2C6D; break;
286            case 0x0252 : mapChar = 0x2C70; break;
287            case 0x0265 : mapChar = 0xA78D; break;
288            case 0x0266 : mapChar = 0xA7AA; break;
289            case 0x026B : mapChar = 0x2C62; break;
290            case 0x0271 : mapChar = 0x2C6E; break;
291            case 0x027D : mapChar = 0x2C64; break;
292            case 0x1D79 : mapChar = 0xA77D; break;
293            case 0x1D7D : mapChar = 0x2C63; break;
294            case 0x2C65 : mapChar = 0x023A; break;
295            case 0x2C66 : mapChar = 0x023E; break;
296            case 0x2D00 : mapChar = 0x10A0; break;
297            case 0x2D01 : mapChar = 0x10A1; break;
298            case 0x2D02 : mapChar = 0x10A2; break;
299            case 0x2D03 : mapChar = 0x10A3; break;
300            case 0x2D04 : mapChar = 0x10A4; break;
301            case 0x2D05 : mapChar = 0x10A5; break;
302            case 0x2D06 : mapChar = 0x10A6; break;
303            case 0x2D07 : mapChar = 0x10A7; break;
304            case 0x2D08 : mapChar = 0x10A8; break;
305            case 0x2D09 : mapChar = 0x10A9; break;
306            case 0x2D0A : mapChar = 0x10AA; break;
307            case 0x2D0B : mapChar = 0x10AB; break;
308            case 0x2D0C : mapChar = 0x10AC; break;
309            case 0x2D0D : mapChar = 0x10AD; break;
310            case 0x2D0E : mapChar = 0x10AE; break;
311            case 0x2D0F : mapChar = 0x10AF; break;
312            case 0x2D10 : mapChar = 0x10B0; break;
313            case 0x2D11 : mapChar = 0x10B1; break;
314            case 0x2D12 : mapChar = 0x10B2; break;
315            case 0x2D13 : mapChar = 0x10B3; break;
316            case 0x2D14 : mapChar = 0x10B4; break;
317            case 0x2D15 : mapChar = 0x10B5; break;
318            case 0x2D16 : mapChar = 0x10B6; break;
319            case 0x2D17 : mapChar = 0x10B7; break;
320            case 0x2D18 : mapChar = 0x10B8; break;
321            case 0x2D19 : mapChar = 0x10B9; break;
322            case 0x2D1A : mapChar = 0x10BA; break;
323            case 0x2D1B : mapChar = 0x10BB; break;
324            case 0x2D1C : mapChar = 0x10BC; break;
325            case 0x2D1D : mapChar = 0x10BD; break;
326            case 0x2D1E : mapChar = 0x10BE; break;
327            case 0x2D1F : mapChar = 0x10BF; break;
328            case 0x2D20 : mapChar = 0x10C0; break;
329            case 0x2D21 : mapChar = 0x10C1; break;
330            case 0x2D22 : mapChar = 0x10C2; break;
331            case 0x2D23 : mapChar = 0x10C3; break;
332            case 0x2D24 : mapChar = 0x10C4; break;
333            case 0x2D25 : mapChar = 0x10C5; break;
334            case 0x2D27 : mapChar = 0x10C7; break;
335            case 0x2D2D : mapChar = 0x10CD; break;
336              // ch must have a 1:M case mapping, but we
337              // can't handle it here. Return ch.
338              // since mapChar is already set, no need
339              // to redo it here.
340              //default       : mapChar = ch;
341            }
342          }
343          else {
344            int offset = val  << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset);
345            mapChar =  ch - offset;
346          }
347        }
348        return mapChar;
349    }
350
351    int toTitleCase(int ch) {
352        int mapChar = ch;
353        int val = getProperties(ch);
354
355        if ((val & $$maskTitleCase) != 0) {
356            // There is a titlecase equivalent.  Perform further checks:
357            if ((val & $$maskUpperCase) == 0) {
358                // The character does not have an uppercase equivalent, so it must
359                // already be uppercase; so add 1 to get the titlecase form.
360                mapChar = ch + 1;
361            }
362            else if ((val & $$maskLowerCase) == 0) {
363                // The character does not have a lowercase equivalent, so it must
364                // already be lowercase; so subtract 1 to get the titlecase form.
365                mapChar = ch - 1;
366            }
367            // else {
368            // The character has both an uppercase equivalent and a lowercase
369            // equivalent, so it must itself be a titlecase form; return it.
370            // return ch;
371            //}
372        }
373        else if ((val & $$maskUpperCase) != 0) {
374            // This character has no titlecase equivalent but it does have an
375            // uppercase equivalent, so use that (subtract the signed case offset).
376            mapChar = toUpperCase(ch);
377        }
378        return mapChar;
379    }
380
381    int digit(int ch, int radix) {
382        int value = -1;
383        if (radix >= Character.MIN_RADIX && radix <= Character.MAX_RADIX) {
384            int val = getProperties(ch);
385            int kind = val & $$maskType;
386            if (kind == Character.DECIMAL_DIGIT_NUMBER) {
387                value = ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit;
388            }
389            else if ((val & $$maskNumericType) == $$valueJavaSupradecimal) {
390                // Java supradecimal digit
391                value = (ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit) + 10;
392            }
393        }
394        return (value < radix) ? value : -1;
395    }
396
397    int getNumericValue(int ch) {
398        int val = getProperties(ch);
399        int retval = -1;
400
401        switch (val & $$maskNumericType) {
402        default: // cannot occur
403        case ($$valueNotNumeric):         // not numeric
404            retval = -1;
405            break;
406        case ($$valueDigit):              // simple numeric
407            retval = ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit;
408            break;
409        case ($$valueStrangeNumeric)      :       // "strange" numeric
410            switch (ch) {
411                case 0x0BF1: retval = 100; break;         // TAMIL NUMBER ONE HUNDRED
412                case 0x0BF2: retval = 1000; break;        // TAMIL NUMBER ONE THOUSAND
413                case 0x1375: retval = 40; break;          // ETHIOPIC NUMBER FORTY
414                case 0x1376: retval = 50; break;          // ETHIOPIC NUMBER FIFTY
415                case 0x1377: retval = 60; break;          // ETHIOPIC NUMBER SIXTY
416                case 0x1378: retval = 70; break;          // ETHIOPIC NUMBER SEVENTY
417                case 0x1379: retval = 80; break;          // ETHIOPIC NUMBER EIGHTY
418                case 0x137A: retval = 90; break;          // ETHIOPIC NUMBER NINETY
419                case 0x137B: retval = 100; break;         // ETHIOPIC NUMBER HUNDRED
420                case 0x137C: retval = 10000; break;       // ETHIOPIC NUMBER TEN THOUSAND
421                case 0x215F: retval = 1; break;           // FRACTION NUMERATOR ONE
422                case 0x216C: retval = 50; break;          // ROMAN NUMERAL FIFTY
423                case 0x216D: retval = 100; break;         // ROMAN NUMERAL ONE HUNDRED
424                case 0x216E: retval = 500; break;         // ROMAN NUMERAL FIVE HUNDRED
425                case 0x216F: retval = 1000; break;        // ROMAN NUMERAL ONE THOUSAND
426                case 0x217C: retval = 50; break;          // SMALL ROMAN NUMERAL FIFTY
427                case 0x217D: retval = 100; break;         // SMALL ROMAN NUMERAL ONE HUNDRED
428                case 0x217E: retval = 500; break;         // SMALL ROMAN NUMERAL FIVE HUNDRED
429                case 0x217F: retval = 1000; break;        // SMALL ROMAN NUMERAL ONE THOUSAND
430                case 0x2180: retval = 1000; break;        // ROMAN NUMERAL ONE THOUSAND C D
431                case 0x2181: retval = 5000; break;        // ROMAN NUMERAL FIVE THOUSAND
432                case 0x2182: retval = 10000; break;       // ROMAN NUMERAL TEN THOUSAND
433
434                case 0x324B: retval = 40; break;
435                case 0x324C: retval = 50; break;
436                case 0x324D: retval = 60; break;
437                case 0x324E: retval = 70; break;
438                case 0x324F: retval = 80; break;
439                case 0x325C: retval = 32; break;
440
441                case 0x325D: retval = 33; break;          // CIRCLED NUMBER THIRTY THREE
442                case 0x325E: retval = 34; break;          // CIRCLED NUMBER THIRTY FOUR
443                case 0x325F: retval = 35; break;          // CIRCLED NUMBER THIRTY FIVE
444                case 0x32B1: retval = 36; break;          // CIRCLED NUMBER THIRTY SIX
445                case 0x32B2: retval = 37; break;          // CIRCLED NUMBER THIRTY SEVEN
446                case 0x32B3: retval = 38; break;          // CIRCLED NUMBER THIRTY EIGHT
447                case 0x32B4: retval = 39; break;          // CIRCLED NUMBER THIRTY NINE
448                case 0x32B5: retval = 40; break;          // CIRCLED NUMBER FORTY
449                case 0x32B6: retval = 41; break;          // CIRCLED NUMBER FORTY ONE
450                case 0x32B7: retval = 42; break;          // CIRCLED NUMBER FORTY TWO
451                case 0x32B8: retval = 43; break;          // CIRCLED NUMBER FORTY THREE
452                case 0x32B9: retval = 44; break;          // CIRCLED NUMBER FORTY FOUR
453                case 0x32BA: retval = 45; break;          // CIRCLED NUMBER FORTY FIVE
454                case 0x32BB: retval = 46; break;          // CIRCLED NUMBER FORTY SIX
455                case 0x32BC: retval = 47; break;          // CIRCLED NUMBER FORTY SEVEN
456                case 0x32BD: retval = 48; break;          // CIRCLED NUMBER FORTY EIGHT
457                case 0x32BE: retval = 49; break;          // CIRCLED NUMBER FORTY NINE
458                case 0x32BF: retval = 50; break;          // CIRCLED NUMBER FIFTY
459
460                case 0x0D71: retval = 100; break;         // MALAYALAM NUMBER ONE HUNDRED
461                case 0x0D72: retval = 1000; break;        // MALAYALAM NUMBER ONE THOUSAND
462                case 0x2186: retval = 50; break;          // ROMAN NUMERAL FIFTY EARLY FORM
463                case 0x2187: retval = 50000; break;       // ROMAN NUMERAL FIFTY THOUSAND
464                case 0x2188: retval = 100000; break;      // ROMAN NUMERAL ONE HUNDRED THOUSAND
465
466                default:       retval = -2; break;
467            }
468            break;
469        case ($$valueJavaSupradecimal):           // Java supradecimal
470            retval = (ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit) + 10;
471            break;
472        }
473        return retval;
474    }
475
476    boolean isWhitespace(int ch) {
477        int props = getProperties(ch);
478        return ((props & $$maskIdentifierInfo) == $$valueJavaWhitespace);
479    }
480
481    byte getDirectionality(int ch) {
482        int val = getProperties(ch);
483        byte directionality = (byte)((val & $$maskBidi) >> $$shiftBidi);
484        if (directionality == 0xF ) {
485            switch(ch) {
486                case 0x202A :
487                    // This is the only char with LRE
488                    directionality = Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING;
489                    break;
490                case 0x202B :
491                    // This is the only char with RLE
492                    directionality = Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING;
493                    break;
494                case 0x202C :
495                    // This is the only char with PDF
496                    directionality = Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT;
497                    break;
498                case 0x202D :
499                    // This is the only char with LRO
500                    directionality = Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE;
501                    break;
502                case 0x202E :
503                    // This is the only char with RLO
504                    directionality = Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE;
505                    break;
506                default :
507                    directionality = Character.DIRECTIONALITY_UNDEFINED;
508                    break;
509            }
510        }
511        return directionality;
512    }
513
514    boolean isMirrored(int ch) {
515        int props = getProperties(ch);
516        return ((props & $$maskMirrored) != 0);
517    }
518
519    int toUpperCaseEx(int ch) {
520        int mapChar = ch;
521        int val = getProperties(ch);
522
523        if ((val & $$maskUpperCase) != 0) {
524            if ((val & $$maskCaseOffset) != $$maskCaseOffset) {
525                int offset = val  << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset);
526                mapChar =  ch - offset;
527            }
528            else {
529                switch(ch) {
530                    // map overflow characters
531                    case 0x00B5 : mapChar = 0x039C; break;
532                    case 0x017F : mapChar = 0x0053; break;
533                    case 0x1FBE : mapChar = 0x0399; break;
534
535                    case 0x023F : mapChar = 0x2C7E; break;
536                    case 0x0240 : mapChar = 0x2C7F; break;
537                    case 0x0250 : mapChar = 0x2C6F; break;
538                    case 0x0251 : mapChar = 0x2C6D; break;
539                    case 0x0252 : mapChar = 0x2C70; break;
540                    case 0x0265 : mapChar = 0xA78D; break;
541                    case 0x0266 : mapChar = 0xA7AA; break;
542                    case 0x026B : mapChar = 0x2C62; break;
543                    case 0x0271 : mapChar = 0x2C6E; break;
544                    case 0x027D : mapChar = 0x2C64; break;
545                    case 0x1D79 : mapChar = 0xA77D; break;
546                    case 0x1D7D : mapChar = 0x2C63; break;
547                    case 0x2C65 : mapChar = 0x023A; break;
548                    case 0x2C66 : mapChar = 0x023E; break;
549                    case 0x2D00 : mapChar = 0x10A0; break;
550                    case 0x2D01 : mapChar = 0x10A1; break;
551                    case 0x2D02 : mapChar = 0x10A2; break;
552                    case 0x2D03 : mapChar = 0x10A3; break;
553                    case 0x2D04 : mapChar = 0x10A4; break;
554                    case 0x2D05 : mapChar = 0x10A5; break;
555                    case 0x2D06 : mapChar = 0x10A6; break;
556                    case 0x2D07 : mapChar = 0x10A7; break;
557                    case 0x2D08 : mapChar = 0x10A8; break;
558                    case 0x2D09 : mapChar = 0x10A9; break;
559                    case 0x2D0A : mapChar = 0x10AA; break;
560                    case 0x2D0B : mapChar = 0x10AB; break;
561                    case 0x2D0C : mapChar = 0x10AC; break;
562                    case 0x2D0D : mapChar = 0x10AD; break;
563                    case 0x2D0E : mapChar = 0x10AE; break;
564                    case 0x2D0F : mapChar = 0x10AF; break;
565                    case 0x2D10 : mapChar = 0x10B0; break;
566                    case 0x2D11 : mapChar = 0x10B1; break;
567                    case 0x2D12 : mapChar = 0x10B2; break;
568                    case 0x2D13 : mapChar = 0x10B3; break;
569                    case 0x2D14 : mapChar = 0x10B4; break;
570                    case 0x2D15 : mapChar = 0x10B5; break;
571                    case 0x2D16 : mapChar = 0x10B6; break;
572                    case 0x2D17 : mapChar = 0x10B7; break;
573                    case 0x2D18 : mapChar = 0x10B8; break;
574                    case 0x2D19 : mapChar = 0x10B9; break;
575                    case 0x2D1A : mapChar = 0x10BA; break;
576                    case 0x2D1B : mapChar = 0x10BB; break;
577                    case 0x2D1C : mapChar = 0x10BC; break;
578                    case 0x2D1D : mapChar = 0x10BD; break;
579                    case 0x2D1E : mapChar = 0x10BE; break;
580                    case 0x2D1F : mapChar = 0x10BF; break;
581                    case 0x2D20 : mapChar = 0x10C0; break;
582                    case 0x2D21 : mapChar = 0x10C1; break;
583                    case 0x2D22 : mapChar = 0x10C2; break;
584                    case 0x2D23 : mapChar = 0x10C3; break;
585                    case 0x2D24 : mapChar = 0x10C4; break;
586                    case 0x2D25 : mapChar = 0x10C5; break;
587                    case 0x2D27 : mapChar = 0x10C7; break;
588                    case 0x2D2D : mapChar = 0x10CD; break;
589                    default       : mapChar = Character.ERROR; break;
590                }
591            }
592        }
593        return mapChar;
594    }
595
596    char[] toUpperCaseCharArray(int ch) {
597        char[] upperMap = {(char)ch};
598        int location = findInCharMap(ch);
599        if (location != -1) {
600            upperMap = charMap[location][1];
601        }
602        return upperMap;
603    }
604
605
606    /**
607     * Finds the character in the uppercase mapping table.
608     *
609     * @param ch the <code>char</code> to search
610     * @return the index location ch in the table or -1 if not found
611     * @since 1.4
612     */
613     int findInCharMap(int ch) {
614        if (charMap == null || charMap.length == 0) {
615            return -1;
616        }
617        int top, bottom, current;
618        bottom = 0;
619        top = charMap.length;
620        current = top/2;
621        // invariant: top > current >= bottom && ch >= CharacterData.charMap[bottom][0]
622        while (top - bottom > 1) {
623            if (ch >= charMap[current][0][0]) {
624                bottom = current;
625            } else {
626                top = current;
627            }
628            current = (top + bottom) / 2;
629        }
630        if (ch == charMap[current][0][0]) return current;
631        else return -1;
632    }
633
634    static final CharacterData00 instance = new CharacterData00();
635    private CharacterData00() {};
636
637    $$Tables
638
639    static {
640        $$Initializers
641    }        
642}
643