UCD.t revision 1.1
1BEGIN { 2 if (ord("A") == 193) { 3 print "1..0 # Skip: EBCDIC\n"; 4 exit 0; 5 } 6 chdir 't' if -d 't'; 7 @INC = '../lib'; 8 @INC = "::lib" if $^O eq 'MacOS'; # module parses @INC itself 9} 10 11use strict; 12use Unicode::UCD; 13use Test::More; 14 15BEGIN { plan tests => 162 }; 16 17use Unicode::UCD 'charinfo'; 18 19my $charinfo; 20 21$charinfo = charinfo(0x41); 22 23is($charinfo->{code}, '0041', 'LATIN CAPITAL LETTER A'); 24is($charinfo->{name}, 'LATIN CAPITAL LETTER A'); 25is($charinfo->{category}, 'Lu'); 26is($charinfo->{combining}, '0'); 27is($charinfo->{bidi}, 'L'); 28is($charinfo->{decomposition}, ''); 29is($charinfo->{decimal}, ''); 30is($charinfo->{digit}, ''); 31is($charinfo->{numeric}, ''); 32is($charinfo->{mirrored}, 'N'); 33is($charinfo->{unicode10}, ''); 34is($charinfo->{comment}, ''); 35is($charinfo->{upper}, ''); 36is($charinfo->{lower}, '0061'); 37is($charinfo->{title}, ''); 38is($charinfo->{block}, 'Basic Latin'); 39is($charinfo->{script}, 'Latin'); 40 41$charinfo = charinfo(0x100); 42 43is($charinfo->{code}, '0100', 'LATIN CAPITAL LETTER A WITH MACRON'); 44is($charinfo->{name}, 'LATIN CAPITAL LETTER A WITH MACRON'); 45is($charinfo->{category}, 'Lu'); 46is($charinfo->{combining}, '0'); 47is($charinfo->{bidi}, 'L'); 48is($charinfo->{decomposition}, '0041 0304'); 49is($charinfo->{decimal}, ''); 50is($charinfo->{digit}, ''); 51is($charinfo->{numeric}, ''); 52is($charinfo->{mirrored}, 'N'); 53is($charinfo->{unicode10}, 'LATIN CAPITAL LETTER A MACRON'); 54is($charinfo->{comment}, ''); 55is($charinfo->{upper}, ''); 56is($charinfo->{lower}, '0101'); 57is($charinfo->{title}, ''); 58is($charinfo->{block}, 'Latin Extended-A'); 59is($charinfo->{script}, 'Latin'); 60 61# 0x0590 is in the Hebrew block but unused. 62 63$charinfo = charinfo(0x590); 64 65is($charinfo->{code}, undef, '0x0590 - unused Hebrew'); 66is($charinfo->{name}, undef); 67is($charinfo->{category}, undef); 68is($charinfo->{combining}, undef); 69is($charinfo->{bidi}, undef); 70is($charinfo->{decomposition}, undef); 71is($charinfo->{decimal}, undef); 72is($charinfo->{digit}, undef); 73is($charinfo->{numeric}, undef); 74is($charinfo->{mirrored}, undef); 75is($charinfo->{unicode10}, undef); 76is($charinfo->{comment}, undef); 77is($charinfo->{upper}, undef); 78is($charinfo->{lower}, undef); 79is($charinfo->{title}, undef); 80is($charinfo->{block}, undef); 81is($charinfo->{script}, undef); 82 83# 0x05d0 is in the Hebrew block and used. 84 85$charinfo = charinfo(0x5d0); 86 87is($charinfo->{code}, '05D0', '05D0 - used Hebrew'); 88is($charinfo->{name}, 'HEBREW LETTER ALEF'); 89is($charinfo->{category}, 'Lo'); 90is($charinfo->{combining}, '0'); 91is($charinfo->{bidi}, 'R'); 92is($charinfo->{decomposition}, ''); 93is($charinfo->{decimal}, ''); 94is($charinfo->{digit}, ''); 95is($charinfo->{numeric}, ''); 96is($charinfo->{mirrored}, 'N'); 97is($charinfo->{unicode10}, ''); 98is($charinfo->{comment}, ''); 99is($charinfo->{upper}, ''); 100is($charinfo->{lower}, ''); 101is($charinfo->{title}, ''); 102is($charinfo->{block}, 'Hebrew'); 103is($charinfo->{script}, 'Hebrew'); 104 105# An open syllable in Hangul. 106 107$charinfo = charinfo(0xAC00); 108 109is($charinfo->{code}, 'AC00', 'HANGUL SYLLABLE-AC00'); 110is($charinfo->{name}, 'HANGUL SYLLABLE-AC00'); 111is($charinfo->{category}, 'Lo'); 112is($charinfo->{combining}, '0'); 113is($charinfo->{bidi}, 'L'); 114is($charinfo->{decomposition}, undef); 115is($charinfo->{decimal}, ''); 116is($charinfo->{digit}, ''); 117is($charinfo->{numeric}, ''); 118is($charinfo->{mirrored}, 'N'); 119is($charinfo->{unicode10}, ''); 120is($charinfo->{comment}, ''); 121is($charinfo->{upper}, ''); 122is($charinfo->{lower}, ''); 123is($charinfo->{title}, ''); 124is($charinfo->{block}, 'Hangul Syllables'); 125is($charinfo->{script}, 'Hangul'); 126 127# A closed syllable in Hangul. 128 129$charinfo = charinfo(0xAE00); 130 131is($charinfo->{code}, 'AE00', 'HANGUL SYLLABLE-AE00'); 132is($charinfo->{name}, 'HANGUL SYLLABLE-AE00'); 133is($charinfo->{category}, 'Lo'); 134is($charinfo->{combining}, '0'); 135is($charinfo->{bidi}, 'L'); 136is($charinfo->{decomposition}, undef); 137is($charinfo->{decimal}, ''); 138is($charinfo->{digit}, ''); 139is($charinfo->{numeric}, ''); 140is($charinfo->{mirrored}, 'N'); 141is($charinfo->{unicode10}, ''); 142is($charinfo->{comment}, ''); 143is($charinfo->{upper}, ''); 144is($charinfo->{lower}, ''); 145is($charinfo->{title}, ''); 146is($charinfo->{block}, 'Hangul Syllables'); 147is($charinfo->{script}, 'Hangul'); 148 149$charinfo = charinfo(0x1D400); 150 151is($charinfo->{code}, '1D400', 'MATHEMATICAL BOLD CAPITAL A'); 152is($charinfo->{name}, 'MATHEMATICAL BOLD CAPITAL A'); 153is($charinfo->{category}, 'Lu'); 154is($charinfo->{combining}, '0'); 155is($charinfo->{bidi}, 'L'); 156is($charinfo->{decomposition}, '<font> 0041'); 157is($charinfo->{decimal}, ''); 158is($charinfo->{digit}, ''); 159is($charinfo->{numeric}, ''); 160is($charinfo->{mirrored}, 'N'); 161is($charinfo->{unicode10}, ''); 162is($charinfo->{comment}, ''); 163is($charinfo->{upper}, ''); 164is($charinfo->{lower}, ''); 165is($charinfo->{title}, ''); 166is($charinfo->{block}, 'Mathematical Alphanumeric Symbols'); 167is($charinfo->{script}, undef); 168 169use Unicode::UCD qw(charblock charscript); 170 171# 0x0590 is in the Hebrew block but unused. 172 173is(charblock(0x590), 'Hebrew', '0x0590 - Hebrew unused charblock'); 174is(charscript(0x590), undef, '0x0590 - Hebrew unused charscript'); 175 176$charinfo = charinfo(0xbe); 177 178is($charinfo->{code}, '00BE', 'VULGAR FRACTION THREE QUARTERS'); 179is($charinfo->{name}, 'VULGAR FRACTION THREE QUARTERS'); 180is($charinfo->{category}, 'No'); 181is($charinfo->{combining}, '0'); 182is($charinfo->{bidi}, 'ON'); 183is($charinfo->{decomposition}, '<fraction> 0033 2044 0034'); 184is($charinfo->{decimal}, ''); 185is($charinfo->{digit}, ''); 186is($charinfo->{numeric}, '3/4'); 187is($charinfo->{mirrored}, 'N'); 188is($charinfo->{unicode10}, 'FRACTION THREE QUARTERS'); 189is($charinfo->{comment}, ''); 190is($charinfo->{upper}, ''); 191is($charinfo->{lower}, ''); 192is($charinfo->{title}, ''); 193is($charinfo->{block}, 'Latin-1 Supplement'); 194is($charinfo->{script}, undef); 195 196use Unicode::UCD qw(charblocks charscripts); 197 198my $charblocks = charblocks(); 199 200ok(exists $charblocks->{Thai}, 'Thai charblock exists'); 201is($charblocks->{Thai}->[0]->[0], hex('0e00')); 202ok(!exists $charblocks->{PigLatin}, 'PigLatin charblock does not exist'); 203 204my $charscripts = charscripts(); 205 206ok(exists $charscripts->{Armenian}, 'Armenian charscript exists'); 207is($charscripts->{Armenian}->[0]->[0], hex('0531')); 208ok(!exists $charscripts->{PigLatin}, 'PigLatin charscript does not exist'); 209 210my $charscript; 211 212$charscript = charscript("12ab"); 213is($charscript, 'Ethiopic', 'Ethiopic charscript'); 214 215$charscript = charscript("0x12ab"); 216is($charscript, 'Ethiopic'); 217 218$charscript = charscript("U+12ab"); 219is($charscript, 'Ethiopic'); 220 221my $ranges; 222 223$ranges = charscript('Ogham'); 224is($ranges->[0]->[0], hex('1681'), 'Ogham charscript'); 225is($ranges->[0]->[1], hex('169a')); 226 227use Unicode::UCD qw(charinrange); 228 229$ranges = charscript('Cherokee'); 230ok(!charinrange($ranges, "139f"), 'Cherokee charscript'); 231ok( charinrange($ranges, "13a0")); 232ok( charinrange($ranges, "13f4")); 233ok(!charinrange($ranges, "13f5")); 234 235is(Unicode::UCD::UnicodeVersion, '3.2.0', 'UnicodeVersion'); 236 237use Unicode::UCD qw(compexcl); 238 239ok(!compexcl(0x0100), 'compexcl'); 240ok( compexcl(0x0958)); 241 242use Unicode::UCD qw(casefold); 243 244my $casefold; 245 246$casefold = casefold(0x41); 247 248ok($casefold->{code} eq '0041' && 249 $casefold->{status} eq 'C' && 250 $casefold->{mapping} eq '0061', 'casefold 0x41'); 251 252$casefold = casefold(0xdf); 253 254ok($casefold->{code} eq '00DF' && 255 $casefold->{status} eq 'F' && 256 $casefold->{mapping} eq '0073 0073', 'casefold 0xDF'); 257 258ok(!casefold(0x20)); 259 260use Unicode::UCD qw(casespec); 261 262my $casespec; 263 264ok(!casespec(0x41)); 265 266$casespec = casespec(0xdf); 267 268ok($casespec->{code} eq '00DF' && 269 $casespec->{lower} eq '00DF' && 270 $casespec->{title} eq '0053 0073' && 271 $casespec->{upper} eq '0053 0053' && 272 $casespec->{condition} eq undef, 'casespec 0xDF'); 273 274$casespec = casespec(0x307); 275 276ok($casespec->{az}->{code} eq '0307' && 277 $casespec->{az}->{lower} eq '' && 278 $casespec->{az}->{title} eq '0307' && 279 $casespec->{az}->{upper} eq '0307' && 280 $casespec->{az}->{condition} eq 'az After_Soft_Dotted', 281 'casespec 0x307'); 282