1/*
2 * Copyright 2003, Axel D��rfler, axeld@pinc-software.de. All rights reserved.
3 * Distributed under the terms of the MIT License.
4 *
5 * Authors:
6 *		Axel D��rfler, axeld@pinc-software.de
7 *		Siarzhuk Zharski, zharik@gmx.li
8 *
9 */
10
11
12#include <UnicodeChar.h>
13
14#include <unicode/uchar.h>
15#include <unicode/utf8.h>
16
17
18BUnicodeChar::BUnicodeChar()
19{
20}
21
22
23// Returns the general category value for the code point.
24int8
25BUnicodeChar::Type(uint32 c)
26{
27	return u_charType(c);
28}
29
30
31// Determines whether the specified code point is a letter character.
32// True for general categories "L" (letters).
33bool
34BUnicodeChar::IsAlpha(uint32 c)
35{
36	return u_isalpha(c);
37}
38
39
40// Determines whether the specified code point is an alphanumeric character
41// (letter or digit).
42// True for characters with general categories
43// "L" (letters) and "Nd" (decimal digit numbers).
44bool
45BUnicodeChar::IsAlNum(uint32 c)
46{
47	return u_isalnum(c);
48}
49
50
51// Check if a code point has the Lowercase Unicode property (UCHAR_LOWERCASE).
52bool
53BUnicodeChar::IsLower(uint32 c)
54{
55	return u_isULowercase(c);
56}
57
58
59// Check if a code point has the Uppercase Unicode property (UCHAR_UPPERCASE).
60bool
61BUnicodeChar::IsUpper(uint32 c)
62{
63	return u_isUUppercase(c);
64}
65
66
67// Determines whether the specified code point is a titlecase letter.
68// True for general category "Lt" (titlecase letter).
69bool
70BUnicodeChar::IsTitle(uint32 c)
71{
72	return u_istitle(c);
73}
74
75
76// Determines whether the specified code point is a digit character.
77// True for characters with general category "Nd" (decimal digit numbers).
78// Beginning with Unicode 4, this is the same as
79// testing for the Numeric_Type of Decimal.
80bool
81BUnicodeChar::IsDigit(uint32 c)
82{
83	return u_isdigit(c);
84}
85
86
87// Determines whether the specified code point is a hexadecimal digit.
88// This is equivalent to u_digit(c, 16)>=0.
89// True for characters with general category "Nd" (decimal digit numbers)
90// as well as Latin letters a-f and A-F in both ASCII and Fullwidth ASCII.
91// (That is, for letters with code points
92// 0041..0046, 0061..0066, FF21..FF26, FF41..FF46.)
93bool
94BUnicodeChar::IsHexDigit(uint32 c)
95{
96	return u_isxdigit(c);
97}
98
99
100// Determines whether the specified code point is "defined",
101// which usually means that it is assigned a character.
102// True for general categories other than "Cn" (other, not assigned),
103// i.e., true for all code points mentioned in UnicodeData.txt.
104bool
105BUnicodeChar::IsDefined(uint32 c)
106{
107	return u_isdefined(c);
108}
109
110
111// Determines whether the specified code point is a base character.
112// True for general categories "L" (letters), "N" (numbers),
113// "Mc" (spacing combining marks), and "Me" (enclosing marks).
114bool
115BUnicodeChar::IsBase(uint32 c)
116{
117	return u_isbase(c);
118}
119
120
121// Determines whether the specified code point is a control character
122// (as defined by this function).
123// A control character is one of the following:
124// - ISO 8-bit control character (U+0000..U+001f and U+007f..U+009f)
125// - U_CONTROL_CHAR (Cc)
126// - U_FORMAT_CHAR (Cf)
127// - U_LINE_SEPARATOR (Zl)
128// - U_PARAGRAPH_SEPARATOR (Zp)
129bool
130BUnicodeChar::IsControl(uint32 c)
131{
132	return u_iscntrl(c);
133}
134
135
136// Determines whether the specified code point is a punctuation character.
137// True for characters with general categories "P" (punctuation).
138bool
139BUnicodeChar::IsPunctuation(uint32 c)
140{
141	return u_ispunct(c);
142}
143
144
145// Determine if the specified code point is a space character according to Java.
146// True for characters with general categories "Z" (separators),
147// which does not include control codes (e.g., TAB or Line Feed).
148bool
149BUnicodeChar::IsSpace(uint32 c)
150{
151	return u_isJavaSpaceChar(c);
152}
153
154
155// Determines if the specified code point is a whitespace character
156// A character is considered to be a whitespace character if and only
157// if it satisfies one of the following criteria:
158// - It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"),
159//		but is not also a non-breaking space (U+00A0 NBSP or U+2007 Figure Space
160//		or U+202F Narrow NBSP).
161// - It is U+0009 HORIZONTAL TABULATION.
162// - It is U+000A LINE FEED.
163// - It is U+000B VERTICAL TABULATION.
164// - It is U+000C FORM FEED.
165// - It is U+000D CARRIAGE RETURN.
166// - It is U+001C FILE SEPARATOR.
167// - It is U+001D GROUP SEPARATOR.
168// - It is U+001E RECORD SEPARATOR.
169// - It is U+001F UNIT SEPARATOR.
170bool
171BUnicodeChar::IsWhitespace(uint32 c)
172{
173	return u_isWhitespace(c);
174}
175
176
177// Determines whether the specified code point is a printable character.
178// True for general categories other than "C" (controls).
179bool
180BUnicodeChar::IsPrintable(uint32 c)
181{
182	return u_isprint(c);
183}
184
185
186//	#pragma mark -
187
188uint32
189BUnicodeChar::ToLower(uint32 c)
190{
191	return u_tolower(c);
192}
193
194
195uint32
196BUnicodeChar::ToUpper(uint32 c)
197{
198	return u_toupper(c);
199}
200
201
202uint32
203BUnicodeChar::ToTitle(uint32 c)
204{
205	return u_totitle(c);
206}
207
208
209int32
210BUnicodeChar::DigitValue(uint32 c)
211{
212	return u_digit(c, 10);
213}
214
215
216unicode_east_asian_width
217BUnicodeChar::EastAsianWidth(uint32 c)
218{
219	return (unicode_east_asian_width)u_getIntPropertyValue(c,
220			UCHAR_EAST_ASIAN_WIDTH);
221}
222
223
224void
225BUnicodeChar::ToUTF8(uint32 c, char** out)
226{
227	int i = 0;
228	U8_APPEND_UNSAFE(*out, i, c);
229	*out += i;
230}
231
232
233uint32
234BUnicodeChar::FromUTF8(const char** in)
235{
236	int i = 0;
237	uint32 c = 0;
238	U8_NEXT_UNSAFE(*in, i, c);
239	*in += i;
240
241	return c;
242}
243
244
245size_t
246BUnicodeChar::UTF8StringLength(const char* string)
247{
248	size_t len = 0;
249	while (*string) {
250		FromUTF8(&string);
251		len++;
252	}
253	return len;
254}
255
256
257size_t
258BUnicodeChar::UTF8StringLength(const char* string, size_t maxLength)
259{
260	size_t len = 0;
261	while (len < maxLength && *string) {
262		FromUTF8(&string);
263		len++;
264	}
265	return len;
266}
267