1#! /usr/local/bin/perl -w 2# $Id: generate_nameprep_data.pl,v 1.1 2003/06/04 00:27:54 marka Exp $ 3# 4# Copyright (c) 2001 Japan Network Information Center. All rights reserved. 5# 6# By using this file, you agree to the terms and conditions set forth bellow. 7# 8# LICENSE TERMS AND CONDITIONS 9# 10# The following License Terms and Conditions apply, unless a different 11# license is obtained from Japan Network Information Center ("JPNIC"), 12# a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda, 13# Chiyoda-ku, Tokyo 101-0047, Japan. 14# 15# 1. Use, Modification and Redistribution (including distribution of any 16# modified or derived work) in source and/or binary forms is permitted 17# under this License Terms and Conditions. 18# 19# 2. Redistribution of source code must retain the copyright notices as they 20# appear in each source code file, this License Terms and Conditions. 21# 22# 3. Redistribution in binary form must reproduce the Copyright Notice, 23# this License Terms and Conditions, in the documentation and/or other 24# materials provided with the distribution. For the purposes of binary 25# distribution the "Copyright Notice" refers to the following language: 26# "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved." 27# 28# 4. The name of JPNIC may not be used to endorse or promote products 29# derived from this Software without specific prior written approval of 30# JPNIC. 31# 32# 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC 33# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 34# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 35# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE 36# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 37# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 38# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 39# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 40# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 41# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 42# ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 43# 44 45use v5.6.0; # for pack('U') 46use bytes; 47 48use lib qw(.); 49 50use SparseMap; 51use Getopt::Long; 52 53(my $myid = '$Id: generate_nameprep_data.pl,v 1.1 2003/06/04 00:27:54 marka Exp $') =~ s/\$([^\$]+)\$/\$-$1-\$/; 54 55my @map_bits = (9, 7, 5); 56my @proh_bits = (7, 7, 7); 57my @unas_bits = (7, 7, 7); 58my @bidi_bits = (9, 7, 5); 59 60my @bidi_types = ('OTHERS', 'R_AL', 'L'); 61 62my $dir = '.'; 63my @versions = (); 64 65GetOptions('dir=s', \$dir) or die usage(); 66@versions = @ARGV; 67 68print_header(); 69 70bits_definition("MAP", @map_bits); 71bits_definition("PROH", @proh_bits); 72bits_definition("UNAS", @unas_bits); 73bits_definition("BIDI", @bidi_bits); 74 75generate_data($_) foreach @ARGV; 76 77sub usage { 78 die "Usage: $0 [-dir dir] version..\n"; 79} 80 81sub generate_data { 82 my $version = shift; 83 generate_mapdata($version, "$dir/nameprep.$version.map"); 84 generate_prohibiteddata($version, "$dir/nameprep.$version.prohibited"); 85 generate_unassigneddata($version, "$dir/nameprep.$version.unassigned"); 86 generate_bididata($version, "$dir/nameprep.$version.bidi"); 87} 88 89# 90# Generate mapping data. 91# 92sub generate_mapdata { 93 my $version = shift; 94 my $file = shift; 95 96 my $map = SparseMap::Int->new(BITS => [@map_bits], 97 MAX => 0x110000, 98 MAPALL => 1, 99 DEFAULT => 0); 100 open FILE, $file or die "cannot open $file: $!\n"; 101 102 my $mapbuf = "\0"; # dummy 103 my %maphash = (); 104 while (<FILE>) { 105 if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) { 106 my $same_as = $1; 107 if (grep {$_ eq $same_as} @versions > 0) { 108 generate_map_ref($version, $same_as); 109 close FILE; 110 return; 111 } 112 next; 113 } 114 next if /^\#/; 115 next if /^\s*$/; 116 register_map($map, \$mapbuf, \%maphash, $_); 117 } 118 close FILE; 119 generate_map($version, $map, \$mapbuf); 120} 121 122# 123# Generate prohibited character data. 124# 125sub generate_prohibiteddata { 126 my $version = shift; 127 my $file = shift; 128 129 my $proh = SparseMap::Bit->new(BITS => [@proh_bits], 130 MAX => 0x110000); 131 open FILE, $file or die "cannot open $file: $!\n"; 132 while (<FILE>) { 133 if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) { 134 my $same_as = $1; 135 if (grep {$_ eq $same_as} @versions > 0) { 136 generate_prohibited_ref($version, $same_as); 137 close FILE; 138 return; 139 } 140 next; 141 } 142 next if /^\#/; 143 next if /^\s*$/; 144 register_prohibited($proh, $_); 145 } 146 close FILE; 147 generate_prohibited($version, $proh); 148} 149 150# 151# Generate unassigned codepoint data. 152# 153sub generate_unassigneddata { 154 my $version = shift; 155 my $file = shift; 156 157 my $unas = SparseMap::Bit->new(BITS => [@unas_bits], 158 MAX => 0x110000); 159 open FILE, $file or die "cannot open $file: $!\n"; 160 while (<FILE>) { 161 if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) { 162 my $same_as = $1; 163 if (grep {$_ eq $same_as} @versions > 0) { 164 generate_unassigned_ref($version, $same_as); 165 close FILE; 166 return; 167 } 168 next; 169 } 170 next if /^\#/; 171 next if /^\s*$/; 172 register_unassigned($unas, $_); 173 } 174 close FILE; 175 generate_unassigned($version, $unas); 176} 177 178# 179# Generate data of bidi "R" or "AL" characters. 180# 181sub generate_bididata { 182 my $version = shift; 183 my $file = shift; 184 185 my $bidi = SparseMap::Int->new(BITS => [@bidi_bits], 186 MAX => 0x110000); 187 open FILE, $file or die "cannot open $file: $!\n"; 188 189 my $type = 0; 190 while (<FILE>) { 191 if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) { 192 my $same_as = $1; 193 if (grep {$_ eq $same_as} @versions > 0) { 194 generate_unassigned_ref($version, $same_as); 195 close FILE; 196 return; 197 } 198 next; 199 } 200 if (/^%\s*BIDI_TYPE\s+(\S+)$/) { 201 my $i = 0; 202 for ($i = 0; $i < @bidi_types; $i++) { 203 if ($1 eq $bidi_types[$i]) { 204 $type = $i; 205 last; 206 } 207 } 208 die "unrecognized line: $_" if ($i >= @bidi_types); 209 next; 210 } 211 next if /^\#/; 212 next if /^\s*$/; 213 register_bidi($bidi, $type, $_); 214 } 215 close FILE; 216 217 generate_bidi($version, $bidi); 218} 219 220sub print_header { 221 print <<"END"; 222/* \$Id\$ */ 223/* $myid */ 224/* 225 * Do not edit this file! 226 * This file is generated from NAMEPREP specification. 227 */ 228 229END 230} 231 232sub bits_definition { 233 my $name = shift; 234 my @bits = @_; 235 my $i = 0; 236 237 foreach my $n (@bits) { 238 print "#define ${name}_BITS_$i\t$n\n"; 239 $i++; 240 } 241 print "\n"; 242} 243 244sub register_map { 245 my ($map, $bufref, $hashref, $line) = @_; 246 247 my ($from, $to) = split /;/, $line; 248 my @fcode = map {hex($_)} split ' ', $from; 249 my @tcode = map {hex($_)} split ' ', $to; 250 251 my $ucs4 = pack('V*', @tcode); 252 $ucs4 =~ s/\000+$//; 253 254 my $offset; 255 if (exists $hashref->{$ucs4}) { 256 $offset = $hashref->{$ucs4}; 257 } else { 258 $offset = length $$bufref; 259 $$bufref .= pack('C', length($ucs4)) . $ucs4; 260 $hashref->{$ucs4} = $offset; 261 } 262 263 die "unrecognized line: $line" if @fcode != 1; 264 $map->add($fcode[0], $offset); 265} 266 267sub generate_map { 268 my ($version, $map, $bufref) = @_; 269 270 $map->fix(); 271 272 print $map->cprog(NAME => "nameprep_${version}_map"); 273 print "\nstatic const unsigned char nameprep_${version}_map_data[] = \{\n"; 274 print_uchararray($$bufref); 275 print "};\n\n"; 276} 277 278sub generate_map_ref { 279 my ($version, $refversion) = @_; 280 print <<"END"; 281#define nameprep_${version}_map_imap nameprep_${refversion}_map_imap 282#define nameprep_${version}_map_table nameprep_${refversion}_map_table 283#define nameprep_${version}_map_data nameprep_${refversion}_map_data 284 285END 286} 287 288sub print_uchararray { 289 my @chars = unpack 'C*', $_[0]; 290 my $i = 0; 291 foreach my $v (@chars) { 292 if ($i % 12 == 0) { 293 print "\n" if $i != 0; 294 print "\t"; 295 } 296 printf "%3d, ", $v; 297 $i++; 298 } 299 print "\n"; 300} 301 302sub register_prohibited { 303 my $proh = shift; 304 register_bitmap($proh, @_); 305} 306 307sub register_unassigned { 308 my $unas = shift; 309 register_bitmap($unas, @_); 310} 311 312sub register_bidi { 313 my $bidi = shift; 314 my $type = shift; 315 register_intmap($bidi, $type, @_); 316} 317 318sub generate_prohibited { 319 my ($version, $proh) = @_; 320 generate_bitmap($proh, "nameprep_${version}_prohibited"); 321 print "\n"; 322} 323 324sub generate_prohibited_ref { 325 my ($version, $refversion) = @_; 326 print <<"END"; 327#define nameprep_${version}_prohibited_imap nameprep_${refversion}_prohibited_imap 328#define nameprep_${version}_prohibited_bitmap nameprep_${refversion}_prohibited_bitmap 329 330END 331} 332 333sub generate_unassigned { 334 my ($version, $unas) = @_; 335 generate_bitmap($unas, "nameprep_${version}_unassigned"); 336 print "\n"; 337} 338 339sub generate_unassigned_ref { 340 my ($version, $refversion) = @_; 341 print <<"END"; 342#define nameprep_${version}_unassigned_imap nameprep_${refversion}_unassigned_imap 343#define nameprep_${version}_unassigned_bitmap nameprep_${refversion}_unassigned_bitmap 344 345END 346} 347 348sub generate_bidi { 349 my ($version, $bidi) = @_; 350 351 $bidi->fix(); 352 353 print $bidi->cprog(NAME => "nameprep_${version}_bidi"); 354 print "\n"; 355 print "static const unsigned char nameprep_${version}_bidi_data[] = \{\n"; 356 357 foreach my $type (@bidi_types) { 358 printf "\tidn_biditype_%s, \n", lc($type); 359 } 360 print "};\n\n"; 361} 362 363sub generate_bidi_ref { 364 my ($version, $refversion) = @_; 365 print <<"END"; 366#define nameprep_${version}_bidi_imap nameprep_${refversion}_bidi_imap 367#define nameprep_${version}_bidi_table nameprep_${refversion}_bidi_table 368 369END 370} 371 372sub register_bitmap { 373 my $map = shift; 374 my $line = shift; 375 376 /^([0-9A-Fa-f]+)(?:-([0-9A-Fa-f]+))?/ or die "unrecognized line: $line"; 377 my $start = hex($1); 378 my $end = defined($2) ? hex($2) : undef; 379 if (defined $end) { 380 $map->add($start .. $end); 381 } else { 382 $map->add($start); 383 } 384} 385 386sub register_intmap { 387 my $map = shift; 388 my $value = shift; 389 my $line = shift; 390 391 /^([0-9A-Fa-f]+)(?:-([0-9A-Fa-f]+))?/ or die "unrecognized line: $line"; 392 my $start = hex($1); 393 my $end = defined($2) ? hex($2) : $start; 394 for (my $i = $start; $i <= $end; $i++) { 395 $map->add($i, $value); 396 } 397} 398 399sub generate_bitmap { 400 my $map = shift; 401 my $name = shift; 402 $map->fix(); 403 #$map->stat(); 404 print $map->cprog(NAME => $name); 405} 406