CharacterData02.java.template revision 9330:8b1f1c2a400f
1/*
2 * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package java.lang;
27
28/** The CharacterData class encapsulates the large tables found in
29    Java.lang.Character. */
30
31class CharacterData02 extends CharacterData {
32    /* The character properties are currently encoded into 32 bits in the following manner:
33        1 bit   mirrored property
34        4 bits  directionality property
35        9 bits  signed offset used for converting case
36        1 bit   if 1, adding the signed offset converts the character to lowercase
37        1 bit   if 1, subtracting the signed offset converts the character to uppercase
38        1 bit   if 1, this character has a titlecase equivalent (possibly itself)
39        3 bits  0  may not be part of an identifier
40                1  ignorable control; may continue a Unicode identifier or Java identifier
41                2  may continue a Java identifier but not a Unicode identifier (unused)
42                3  may continue a Unicode identifier or Java identifier
43                4  is a Java whitespace character
44                5  may start or continue a Java identifier;
45                   may continue but not start a Unicode identifier (underscores)
46                6  may start or continue a Java identifier but not a Unicode identifier ($)
47                7  may start or continue a Unicode identifier or Java identifier
48                Thus:
49                   5, 6, 7 may start a Java identifier
50                   1, 2, 3, 5, 6, 7 may continue a Java identifier
51                   7 may start a Unicode identifier
52                   1, 3, 5, 7 may continue a Unicode identifier
53                   1 is ignorable within an identifier
54                   4 is Java whitespace
55        2 bits  0  this character has no numeric property
56                1  adding the digit offset to the character code and then
57                   masking with 0x1F will produce the desired numeric value
58                2  this character has a "strange" numeric value
59                3  a Java supradecimal digit: adding the digit offset to the
60                   character code, then masking with 0x1F, then adding 10
61                   will produce the desired numeric value
62        5 bits  digit offset
63        5 bits  character type
64
65        The encoding of character properties is subject to change at any time.
66     */
67
68    int getProperties(int ch) {
69	char offset = (char)ch;
70        int props = $$Lookup(offset);
71        return props;
72    }
73
74    int getPropertiesEx(int ch) {
75        char offset = (char)ch;
76        int props = $$LookupEx(offset);
77        return props;
78    }
79
80    boolean isOtherLowercase(int ch) {
81        int props = getPropertiesEx(ch);
82        return (props & $$maskOtherLowercase) != 0;
83    }
84
85    boolean isOtherUppercase(int ch) {
86        int props = getPropertiesEx(ch);
87        return (props & $$maskOtherUppercase) != 0;
88    }
89
90    boolean isOtherAlphabetic(int ch) {
91        int props = getPropertiesEx(ch);
92        return (props & $$maskOtherAlphabetic) != 0;
93    }
94
95    boolean isIdeographic(int ch) {
96        int props = getPropertiesEx(ch);
97        return (props & $$maskIdeographic) != 0;
98    }
99
100    int getType(int ch) {
101        int props = getProperties(ch);
102        return (props & $$maskType);
103    }
104
105    boolean isJavaIdentifierStart(int ch) {
106        int props = getProperties(ch);
107        return ((props & $$maskIdentifierInfo) >= $$lowJavaStart);
108    }
109
110    boolean isJavaIdentifierPart(int ch) {
111        int props = getProperties(ch);
112        return ((props & $$nonzeroJavaPart) != 0);
113    }
114
115    boolean isUnicodeIdentifierStart(int ch) {
116        int props = getProperties(ch);
117        return ((props & $$maskIdentifierInfo) == $$valueUnicodeStart);
118    }
119
120    boolean isUnicodeIdentifierPart(int ch) {
121        int props = getProperties(ch);
122        return ((props & $$maskUnicodePart) != 0);
123    }
124
125    boolean isIdentifierIgnorable(int ch) {
126        int props = getProperties(ch);
127        return ((props & $$maskIdentifierInfo) == $$valueIgnorable);
128    }
129
130    int toLowerCase(int ch) {
131        int mapChar = ch;
132        int val = getProperties(ch);
133
134        if ((val & $$maskLowerCase) != 0) {
135            int offset = val << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset);
136            mapChar = ch + offset;
137        }
138        return mapChar;
139    }
140
141    int toUpperCase(int ch) {
142        int mapChar = ch;
143        int val = getProperties(ch);
144
145        if ((val & $$maskUpperCase) != 0) {
146            int offset = val  << $$shiftCaseOffsetSign >> ($$shiftCaseOffsetSign+$$shiftCaseOffset);
147            mapChar =  ch - offset;
148        }
149        return mapChar;
150    }
151
152    int toTitleCase(int ch) {
153        int mapChar = ch;
154        int val = getProperties(ch);
155
156        if ((val & $$maskTitleCase) != 0) {
157            // There is a titlecase equivalent.  Perform further checks:
158            if ((val & $$maskUpperCase) == 0) {
159                // The character does not have an uppercase equivalent, so it must
160                // already be uppercase; so add 1 to get the titlecase form.
161                mapChar = ch + 1;
162            }
163            else if ((val & $$maskLowerCase) == 0) {
164                // The character does not have a lowercase equivalent, so it must
165                // already be lowercase; so subtract 1 to get the titlecase form.
166                mapChar = ch - 1;
167            }
168            // else {
169            // The character has both an uppercase equivalent and a lowercase
170            // equivalent, so it must itself be a titlecase form; return it.
171            // return ch;
172            //}
173        }
174        else if ((val & $$maskUpperCase) != 0) {
175            // This character has no titlecase equivalent but it does have an
176            // uppercase equivalent, so use that (subtract the signed case offset).
177            mapChar = toUpperCase(ch);
178        }
179        return mapChar;
180    }
181
182    int digit(int ch, int radix) {
183        int value = -1;
184        if (radix >= Character.MIN_RADIX && radix <= Character.MAX_RADIX) {
185            int val = getProperties(ch);
186            int kind = val & $$maskType;
187            if (kind == Character.DECIMAL_DIGIT_NUMBER) {
188                value = ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit;
189            }
190            else if ((val & $$maskNumericType) == $$valueJavaSupradecimal) {
191                // Java supradecimal digit
192                value = (ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit) + 10;
193            }
194        }
195        return (value < radix) ? value : -1;
196    }
197
198    int getNumericValue(int ch) {
199        int val = getProperties(ch);
200        int retval = -1;
201
202        switch (val & $$maskNumericType) {
203        default: // cannot occur
204        case ($$valueNotNumeric):         // not numeric
205            retval = -1;
206            break;
207        case ($$valueDigit):              // simple numeric
208            retval = ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit;
209            break;
210        case ($$valueStrangeNumeric)      :       // "strange" numeric
211            retval = -2;
212            break;
213        case ($$valueJavaSupradecimal):           // Java supradecimal
214            retval = (ch + ((val & $$maskDigitOffset) >> $$shiftDigitOffset) & $$maskDigit) + 10;
215            break;
216        }
217        return retval;
218    }
219
220    boolean isWhitespace(int ch) {
221        return (getProperties(ch) & $$maskIdentifierInfo) == $$valueJavaWhitespace;
222    }
223
224    byte getDirectionality(int ch) {
225        int val = getProperties(ch);
226        byte directionality = (byte)((val & $$maskBidi) >> $$shiftBidi);
227        if (directionality == 0xF ) {
228	        directionality = Character.DIRECTIONALITY_UNDEFINED;
229        }
230        return directionality;
231    }
232
233    boolean isMirrored(int ch) {
234        return (getProperties(ch) & $$maskMirrored) != 0;
235    }
236
237    static final CharacterData instance = new CharacterData02();
238    private CharacterData02() {};
239
240    $$Tables
241
242    static {
243        $$Initializers
244    }        
245}
246