POSIX_Unicode.java revision 12745:f068a4ffddd2
1/*
2 * Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24import java.util.HashMap;
25import java.util.Locale;
26
27public final class POSIX_Unicode {
28
29    public static boolean isAlpha(int ch) {
30        return Character.isAlphabetic(ch);
31    }
32
33    public static boolean isLower(int ch) {
34        return Character.isLowerCase(ch);
35    }
36
37    public static boolean isUpper(int ch) {
38        return Character.isUpperCase(ch);
39    }
40
41    // \p{Whitespace}
42    public static boolean isSpace(int ch) {
43        return ((((1 << Character.SPACE_SEPARATOR) |
44                  (1 << Character.LINE_SEPARATOR) |
45                  (1 << Character.PARAGRAPH_SEPARATOR)) >> Character.getType(ch)) & 1)
46                   != 0 ||
47               (ch >= 0x9 && ch <= 0xd) ||
48               (ch == 0x85);
49    }
50
51    // \p{gc=Control}
52    public static boolean isCntrl(int ch) {
53        return Character.getType(ch) == Character.CONTROL;
54    }
55
56    // \p{gc=Punctuation}
57    public static boolean isPunct(int ch) {
58        return ((((1 << Character.CONNECTOR_PUNCTUATION) |
59                  (1 << Character.DASH_PUNCTUATION) |
60                  (1 << Character.START_PUNCTUATION) |
61                  (1 << Character.END_PUNCTUATION) |
62                  (1 << Character.OTHER_PUNCTUATION) |
63                  (1 << Character.INITIAL_QUOTE_PUNCTUATION) |
64                  (1 << Character.FINAL_QUOTE_PUNCTUATION)) >> Character.getType(ch)) & 1)
65              != 0;
66    }
67
68    // \p{gc=Decimal_Number}
69    // \p{Hex_Digit}    -> PropList.txt: Hex_Digit
70    public static boolean isHexDigit(int ch) {
71        return Character.isDigit(ch) ||
72               (ch >= 0x0030 && ch <= 0x0039) ||
73               (ch >= 0x0041 && ch <= 0x0046) ||
74               (ch >= 0x0061 && ch <= 0x0066) ||
75               (ch >= 0xFF10 && ch <= 0xFF19) ||
76               (ch >= 0xFF21 && ch <= 0xFF26) ||
77               (ch >= 0xFF41 && ch <= 0xFF46);
78    }
79
80    // \p{gc=Decimal_Number}
81    public static boolean isDigit(int ch) {
82        return Character.isDigit(ch);
83    };
84
85    // \p{alpha}
86    // \p{digit}
87    public static boolean isAlnum(int ch) {
88        return Character.isAlphabetic(ch) || Character.isDigit(ch);
89    }
90
91    // \p{Whitespace} --
92    // [\N{LF} \N{VT} \N{FF} \N{CR} \N{NEL}  -> 0xa, 0xb, 0xc, 0xd, 0x85
93    //  \p{gc=Line_Separator}
94    //  \p{gc=Paragraph_Separator}]
95    public static boolean isBlank(int ch) {
96        int type = Character.getType(ch);
97        return isSpace(ch) &&
98               ch != 0xa & ch != 0xb && ch !=0xc && ch != 0xd && ch != 0x85 &&
99               type != Character.LINE_SEPARATOR &&
100               type != Character.PARAGRAPH_SEPARATOR;
101    }
102
103    // [^
104    //  \p{space}
105    //  \p{gc=Control}
106    //  \p{gc=Surrogate}
107    //  \p{gc=Unassigned}]
108    public static boolean isGraph(int ch) {
109        int type = Character.getType(ch);
110        return !(isSpace(ch) ||
111                 Character.CONTROL == type ||
112                 Character.SURROGATE == type ||
113                 Character.UNASSIGNED == type);
114    }
115
116    // \p{graph}
117    // \p{blank}
118    // -- \p{cntrl}
119    public static boolean isPrint(int ch) {
120        return (isGraph(ch) || isBlank(ch)) && !isCntrl(ch);
121    }
122
123    // PropList.txt:Noncharacter_Code_Point
124    public static boolean isNoncharacterCodePoint(int ch) {
125        return (ch & 0xfffe) == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef);
126    }
127
128    public static boolean isJoinControl(int ch) {
129        return (ch == 0x200C || ch == 0x200D);
130    }
131
132    //  \p{alpha}
133    //  \p{gc=Mark}
134    //  \p{digit}
135    //  \p{gc=Connector_Punctuation}
136    public static boolean isWord(int ch) {
137        return isAlpha(ch) ||
138               ((((1 << Character.NON_SPACING_MARK) |
139                  (1 << Character.ENCLOSING_MARK) |
140                  (1 << Character.COMBINING_SPACING_MARK) |
141                  (1 << Character.CONNECTOR_PUNCTUATION)) >> Character.getType(ch)) & 1)
142               != 0 ||
143               isDigit(ch) ||
144               isJoinControl(ch);
145    }
146}
147