UCD.t revision 1.1
1BEGIN {
2    if (ord("A") == 193) {
3	print "1..0 # Skip: EBCDIC\n";
4	exit 0;
5    }
6    chdir 't' if -d 't';
7    @INC = '../lib';
8    @INC = "::lib" if $^O eq 'MacOS'; # module parses @INC itself
9}
10
11use strict;
12use Unicode::UCD;
13use Test::More;
14
15BEGIN { plan tests => 162 };
16
17use Unicode::UCD 'charinfo';
18
19my $charinfo;
20
21$charinfo = charinfo(0x41);
22
23is($charinfo->{code},           '0041', 'LATIN CAPITAL LETTER A');
24is($charinfo->{name},           'LATIN CAPITAL LETTER A');
25is($charinfo->{category},       'Lu');
26is($charinfo->{combining},      '0');
27is($charinfo->{bidi},           'L');
28is($charinfo->{decomposition},  '');
29is($charinfo->{decimal},        '');
30is($charinfo->{digit},          '');
31is($charinfo->{numeric},        '');
32is($charinfo->{mirrored},       'N');
33is($charinfo->{unicode10},      '');
34is($charinfo->{comment},        '');
35is($charinfo->{upper},          '');
36is($charinfo->{lower},          '0061');
37is($charinfo->{title},          '');
38is($charinfo->{block},          'Basic Latin');
39is($charinfo->{script},         'Latin');
40
41$charinfo = charinfo(0x100);
42
43is($charinfo->{code},           '0100', 'LATIN CAPITAL LETTER A WITH MACRON');
44is($charinfo->{name},           'LATIN CAPITAL LETTER A WITH MACRON');
45is($charinfo->{category},       'Lu');
46is($charinfo->{combining},      '0');
47is($charinfo->{bidi},           'L');
48is($charinfo->{decomposition},  '0041 0304');
49is($charinfo->{decimal},        '');
50is($charinfo->{digit},          '');
51is($charinfo->{numeric},        '');
52is($charinfo->{mirrored},       'N');
53is($charinfo->{unicode10},      'LATIN CAPITAL LETTER A MACRON');
54is($charinfo->{comment},        '');
55is($charinfo->{upper},          '');
56is($charinfo->{lower},          '0101');
57is($charinfo->{title},          '');
58is($charinfo->{block},          'Latin Extended-A');
59is($charinfo->{script},         'Latin');
60
61# 0x0590 is in the Hebrew block but unused.
62
63$charinfo = charinfo(0x590);
64
65is($charinfo->{code},          undef,	'0x0590 - unused Hebrew');
66is($charinfo->{name},          undef);
67is($charinfo->{category},      undef);
68is($charinfo->{combining},     undef);
69is($charinfo->{bidi},          undef);
70is($charinfo->{decomposition}, undef);
71is($charinfo->{decimal},       undef);
72is($charinfo->{digit},         undef);
73is($charinfo->{numeric},       undef);
74is($charinfo->{mirrored},      undef);
75is($charinfo->{unicode10},     undef);
76is($charinfo->{comment},       undef);
77is($charinfo->{upper},         undef);
78is($charinfo->{lower},         undef);
79is($charinfo->{title},         undef);
80is($charinfo->{block},         undef);
81is($charinfo->{script},        undef);
82
83# 0x05d0 is in the Hebrew block and used.
84
85$charinfo = charinfo(0x5d0);
86
87is($charinfo->{code},           '05D0', '05D0 - used Hebrew');
88is($charinfo->{name},           'HEBREW LETTER ALEF');
89is($charinfo->{category},       'Lo');
90is($charinfo->{combining},      '0');
91is($charinfo->{bidi},           'R');
92is($charinfo->{decomposition},  '');
93is($charinfo->{decimal},        '');
94is($charinfo->{digit},          '');
95is($charinfo->{numeric},        '');
96is($charinfo->{mirrored},       'N');
97is($charinfo->{unicode10},      '');
98is($charinfo->{comment},        '');
99is($charinfo->{upper},          '');
100is($charinfo->{lower},          '');
101is($charinfo->{title},          '');
102is($charinfo->{block},          'Hebrew');
103is($charinfo->{script},         'Hebrew');
104
105# An open syllable in Hangul.
106
107$charinfo = charinfo(0xAC00);
108
109is($charinfo->{code},           'AC00', 'HANGUL SYLLABLE-AC00');
110is($charinfo->{name},           'HANGUL SYLLABLE-AC00');
111is($charinfo->{category},       'Lo');
112is($charinfo->{combining},      '0');
113is($charinfo->{bidi},           'L');
114is($charinfo->{decomposition},  undef);
115is($charinfo->{decimal},        '');
116is($charinfo->{digit},          '');
117is($charinfo->{numeric},        '');
118is($charinfo->{mirrored},       'N');
119is($charinfo->{unicode10},      '');
120is($charinfo->{comment},        '');
121is($charinfo->{upper},          '');
122is($charinfo->{lower},          '');
123is($charinfo->{title},          '');
124is($charinfo->{block},          'Hangul Syllables');
125is($charinfo->{script},         'Hangul');
126
127# A closed syllable in Hangul.
128
129$charinfo = charinfo(0xAE00);
130
131is($charinfo->{code},           'AE00', 'HANGUL SYLLABLE-AE00');
132is($charinfo->{name},           'HANGUL SYLLABLE-AE00');
133is($charinfo->{category},       'Lo');
134is($charinfo->{combining},      '0');
135is($charinfo->{bidi},           'L');
136is($charinfo->{decomposition},  undef);
137is($charinfo->{decimal},        '');
138is($charinfo->{digit},          '');
139is($charinfo->{numeric},        '');
140is($charinfo->{mirrored},       'N');
141is($charinfo->{unicode10},      '');
142is($charinfo->{comment},        '');
143is($charinfo->{upper},          '');
144is($charinfo->{lower},          '');
145is($charinfo->{title},          '');
146is($charinfo->{block},          'Hangul Syllables');
147is($charinfo->{script},         'Hangul');
148
149$charinfo = charinfo(0x1D400);
150
151is($charinfo->{code},           '1D400', 'MATHEMATICAL BOLD CAPITAL A');
152is($charinfo->{name},           'MATHEMATICAL BOLD CAPITAL A');
153is($charinfo->{category},       'Lu');
154is($charinfo->{combining},      '0');
155is($charinfo->{bidi},           'L');
156is($charinfo->{decomposition},  '<font> 0041');
157is($charinfo->{decimal},        '');
158is($charinfo->{digit},          '');
159is($charinfo->{numeric},        '');
160is($charinfo->{mirrored},       'N');
161is($charinfo->{unicode10},      '');
162is($charinfo->{comment},        '');
163is($charinfo->{upper},          '');
164is($charinfo->{lower},          '');
165is($charinfo->{title},          '');
166is($charinfo->{block},          'Mathematical Alphanumeric Symbols');
167is($charinfo->{script},         undef);
168
169use Unicode::UCD qw(charblock charscript);
170
171# 0x0590 is in the Hebrew block but unused.
172
173is(charblock(0x590),          'Hebrew', '0x0590 - Hebrew unused charblock');
174is(charscript(0x590),         undef,    '0x0590 - Hebrew unused charscript');
175
176$charinfo = charinfo(0xbe);
177
178is($charinfo->{code},           '00BE', 'VULGAR FRACTION THREE QUARTERS');
179is($charinfo->{name},           'VULGAR FRACTION THREE QUARTERS');
180is($charinfo->{category},       'No');
181is($charinfo->{combining},      '0');
182is($charinfo->{bidi},           'ON');
183is($charinfo->{decomposition},  '<fraction> 0033 2044 0034');
184is($charinfo->{decimal},        '');
185is($charinfo->{digit},          '');
186is($charinfo->{numeric},        '3/4');
187is($charinfo->{mirrored},       'N');
188is($charinfo->{unicode10},      'FRACTION THREE QUARTERS');
189is($charinfo->{comment},        '');
190is($charinfo->{upper},          '');
191is($charinfo->{lower},          '');
192is($charinfo->{title},          '');
193is($charinfo->{block},          'Latin-1 Supplement');
194is($charinfo->{script},         undef);
195
196use Unicode::UCD qw(charblocks charscripts);
197
198my $charblocks = charblocks();
199
200ok(exists $charblocks->{Thai}, 'Thai charblock exists');
201is($charblocks->{Thai}->[0]->[0], hex('0e00'));
202ok(!exists $charblocks->{PigLatin}, 'PigLatin charblock does not exist');
203
204my $charscripts = charscripts();
205
206ok(exists $charscripts->{Armenian}, 'Armenian charscript exists');
207is($charscripts->{Armenian}->[0]->[0], hex('0531'));
208ok(!exists $charscripts->{PigLatin}, 'PigLatin charscript does not exist');
209
210my $charscript;
211
212$charscript = charscript("12ab");
213is($charscript, 'Ethiopic', 'Ethiopic charscript');
214
215$charscript = charscript("0x12ab");
216is($charscript, 'Ethiopic');
217
218$charscript = charscript("U+12ab");
219is($charscript, 'Ethiopic');
220
221my $ranges;
222
223$ranges = charscript('Ogham');
224is($ranges->[0]->[0], hex('1681'), 'Ogham charscript');
225is($ranges->[0]->[1], hex('169a'));
226
227use Unicode::UCD qw(charinrange);
228
229$ranges = charscript('Cherokee');
230ok(!charinrange($ranges, "139f"), 'Cherokee charscript');
231ok( charinrange($ranges, "13a0"));
232ok( charinrange($ranges, "13f4"));
233ok(!charinrange($ranges, "13f5"));
234
235is(Unicode::UCD::UnicodeVersion, '3.2.0', 'UnicodeVersion');
236
237use Unicode::UCD qw(compexcl);
238
239ok(!compexcl(0x0100), 'compexcl');
240ok( compexcl(0x0958));
241
242use Unicode::UCD qw(casefold);
243
244my $casefold;
245
246$casefold = casefold(0x41);
247
248ok($casefold->{code} eq '0041' &&
249   $casefold->{status} eq 'C'  &&
250   $casefold->{mapping} eq '0061', 'casefold 0x41');
251
252$casefold = casefold(0xdf);
253
254ok($casefold->{code} eq '00DF' &&
255   $casefold->{status} eq 'F'  &&
256   $casefold->{mapping} eq '0073 0073', 'casefold 0xDF');
257
258ok(!casefold(0x20));
259
260use Unicode::UCD qw(casespec);
261
262my $casespec;
263
264ok(!casespec(0x41));
265
266$casespec = casespec(0xdf);
267
268ok($casespec->{code} eq '00DF' &&
269   $casespec->{lower} eq '00DF'  &&
270   $casespec->{title} eq '0053 0073'  &&
271   $casespec->{upper} eq '0053 0053' &&
272   $casespec->{condition} eq undef, 'casespec 0xDF');
273
274$casespec = casespec(0x307);
275
276ok($casespec->{az}->{code} eq '0307' &&
277   $casespec->{az}->{lower} eq ''  &&
278   $casespec->{az}->{title} eq '0307'  &&
279   $casespec->{az}->{upper} eq '0307' &&
280   $casespec->{az}->{condition} eq 'az After_Soft_Dotted',
281   'casespec 0x307');
282