1#! /usr/local/bin/perl -w
2# $Id: generate_nameprep_data.pl,v 1.1 2003/06/04 00:27:54 marka Exp $
3#
4# Copyright (c) 2001 Japan Network Information Center.  All rights reserved.
5#
6# By using this file, you agree to the terms and conditions set forth bellow.
7#
8# 			LICENSE TERMS AND CONDITIONS
9#
10# The following License Terms and Conditions apply, unless a different
11# license is obtained from Japan Network Information Center ("JPNIC"),
12# a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
13# Chiyoda-ku, Tokyo 101-0047, Japan.
14#
15# 1. Use, Modification and Redistribution (including distribution of any
16#    modified or derived work) in source and/or binary forms is permitted
17#    under this License Terms and Conditions.
18#
19# 2. Redistribution of source code must retain the copyright notices as they
20#    appear in each source code file, this License Terms and Conditions.
21#
22# 3. Redistribution in binary form must reproduce the Copyright Notice,
23#    this License Terms and Conditions, in the documentation and/or other
24#    materials provided with the distribution.  For the purposes of binary
25#    distribution the "Copyright Notice" refers to the following language:
26#    "Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved."
27#
28# 4. The name of JPNIC may not be used to endorse or promote products
29#    derived from this Software without specific prior written approval of
30#    JPNIC.
31#
32# 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
33#    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34#    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
35#    PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL JPNIC BE LIABLE
36#    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
37#    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
38#    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
39#    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
40#    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
41#    OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
42#    ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
43#
44
45use v5.6.0;		# for pack('U')
46use bytes;
47
48use lib qw(.);
49
50use SparseMap;
51use Getopt::Long;
52
53(my $myid = '$Id: generate_nameprep_data.pl,v 1.1 2003/06/04 00:27:54 marka Exp $') =~ s/\$([^\$]+)\$/\$-$1-\$/;
54
55my @map_bits = (9, 7, 5);
56my @proh_bits = (7, 7, 7);
57my @unas_bits = (7, 7, 7);
58my @bidi_bits = (9, 7, 5);
59
60my @bidi_types = ('OTHERS', 'R_AL', 'L');
61
62my $dir = '.';
63my @versions = ();
64
65GetOptions('dir=s', \$dir) or die usage();
66@versions = @ARGV;
67
68print_header();
69
70bits_definition("MAP", @map_bits);
71bits_definition("PROH", @proh_bits);
72bits_definition("UNAS", @unas_bits);
73bits_definition("BIDI", @bidi_bits);
74
75generate_data($_) foreach @ARGV;
76
77sub usage {
78    die "Usage: $0 [-dir dir] version..\n";
79}
80
81sub generate_data {
82    my $version = shift;
83    generate_mapdata($version, "$dir/nameprep.$version.map");
84    generate_prohibiteddata($version, "$dir/nameprep.$version.prohibited");
85    generate_unassigneddata($version, "$dir/nameprep.$version.unassigned");
86    generate_bididata($version, "$dir/nameprep.$version.bidi");
87}
88
89#
90# Generate mapping data.
91#
92sub generate_mapdata {
93    my $version = shift;
94    my $file = shift;
95
96    my $map = SparseMap::Int->new(BITS => [@map_bits],
97				  MAX => 0x110000,
98				  MAPALL => 1,
99				  DEFAULT => 0);
100    open FILE, $file or die "cannot open $file: $!\n";
101
102    my $mapbuf = "\0";	# dummy
103    my %maphash = ();
104    while (<FILE>) {
105	if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) {
106	    my $same_as = $1;
107	    if (grep {$_ eq $same_as} @versions > 0) {
108		generate_map_ref($version, $same_as);
109		close FILE;
110		return;
111	    }
112	    next;
113	}
114	next if /^\#/;
115	next if /^\s*$/;
116	register_map($map, \$mapbuf, \%maphash, $_);
117    }
118    close FILE;
119    generate_map($version, $map, \$mapbuf);
120}
121
122#
123# Generate prohibited character data.
124#
125sub generate_prohibiteddata {
126    my $version = shift;
127    my $file = shift;
128
129    my $proh = SparseMap::Bit->new(BITS => [@proh_bits],
130				   MAX => 0x110000);
131    open FILE, $file or die "cannot open $file: $!\n";
132    while (<FILE>) {
133	if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) {
134	    my $same_as = $1;
135	    if (grep {$_ eq $same_as} @versions > 0) {
136		generate_prohibited_ref($version, $same_as);
137		close FILE;
138		return;
139	    }
140	    next;
141	}
142	next if /^\#/;
143	next if /^\s*$/;
144	register_prohibited($proh, $_);
145    }
146    close FILE;
147    generate_prohibited($version, $proh);
148}
149
150#
151# Generate unassigned codepoint data.
152#
153sub generate_unassigneddata {
154    my $version = shift;
155    my $file = shift;
156
157    my $unas = SparseMap::Bit->new(BITS => [@unas_bits],
158				   MAX => 0x110000);
159    open FILE, $file or die "cannot open $file: $!\n";
160    while (<FILE>) {
161	if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) {
162	    my $same_as = $1;
163	    if (grep {$_ eq $same_as} @versions > 0) {
164		generate_unassigned_ref($version, $same_as);
165		close FILE;
166		return;
167	    }
168	    next;
169	}
170	next if /^\#/;
171	next if /^\s*$/;
172	register_unassigned($unas, $_);
173    }
174    close FILE;
175    generate_unassigned($version, $unas);
176}
177
178#
179# Generate data of bidi "R" or "AL" characters.
180#
181sub generate_bididata {
182    my $version = shift;
183    my $file = shift;
184
185    my $bidi = SparseMap::Int->new(BITS => [@bidi_bits],
186				   MAX => 0x110000);
187    open FILE, $file or die "cannot open $file: $!\n";
188
189    my $type = 0;
190    while (<FILE>) {
191	if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) {
192	    my $same_as = $1;
193	    if (grep {$_ eq $same_as} @versions > 0) {
194		generate_unassigned_ref($version, $same_as);
195		close FILE;
196		return;
197	    }
198	    next;
199	}
200	if (/^%\s*BIDI_TYPE\s+(\S+)$/) {
201	    my $i = 0;
202	    for ($i = 0; $i < @bidi_types; $i++) {
203		if ($1 eq $bidi_types[$i]) {
204		    $type = $i;
205		    last;
206		}
207	    }
208	    die "unrecognized line: $_" if ($i >= @bidi_types);
209	    next;
210	}
211	next if /^\#/;
212	next if /^\s*$/;
213	register_bidi($bidi, $type, $_);
214    }
215    close FILE;
216
217    generate_bidi($version, $bidi);
218}
219
220sub print_header {
221    print <<"END";
222/* \$Id\$ */
223/* $myid */
224/*
225 * Do not edit this file!
226 * This file is generated from NAMEPREP specification.
227 */
228
229END
230}
231
232sub bits_definition {
233    my $name = shift;
234    my @bits = @_;
235    my $i = 0;
236
237    foreach my $n (@bits) {
238	print "#define ${name}_BITS_$i\t$n\n";
239	$i++;
240    }
241    print "\n";
242}
243
244sub register_map {
245    my ($map, $bufref, $hashref, $line) = @_;
246
247    my ($from, $to) = split /;/, $line;
248    my @fcode = map {hex($_)} split ' ', $from;
249    my @tcode = map {hex($_)} split ' ', $to;
250
251    my $ucs4 = pack('V*', @tcode);
252    $ucs4 =~ s/\000+$//;
253
254    my $offset;
255    if (exists $hashref->{$ucs4}) {
256	$offset = $hashref->{$ucs4};
257    } else {
258	$offset = length $$bufref;
259	$$bufref .= pack('C', length($ucs4)) . $ucs4;
260	$hashref->{$ucs4} = $offset;
261    }
262
263    die "unrecognized line: $line" if @fcode != 1;
264    $map->add($fcode[0], $offset);
265}
266
267sub generate_map {
268    my ($version, $map, $bufref) = @_;
269
270    $map->fix();
271
272    print $map->cprog(NAME => "nameprep_${version}_map");
273    print "\nstatic const unsigned char nameprep_${version}_map_data[] = \{\n";
274    print_uchararray($$bufref);
275    print "};\n\n";
276}
277
278sub generate_map_ref {
279    my ($version, $refversion) = @_;
280    print <<"END";
281#define nameprep_${version}_map_imap	nameprep_${refversion}_map_imap
282#define nameprep_${version}_map_table	nameprep_${refversion}_map_table
283#define nameprep_${version}_map_data	nameprep_${refversion}_map_data
284
285END
286}
287
288sub print_uchararray {
289    my @chars = unpack 'C*', $_[0];
290    my $i = 0;
291    foreach my $v (@chars) {
292	if ($i % 12 == 0) {
293	    print "\n" if $i != 0;
294	    print "\t";
295	}
296	printf "%3d, ", $v;
297	$i++;
298    }
299    print "\n";
300}
301
302sub register_prohibited {
303    my $proh = shift;
304    register_bitmap($proh, @_);
305}
306
307sub register_unassigned {
308    my $unas = shift;
309    register_bitmap($unas, @_);
310}
311
312sub register_bidi {
313    my $bidi = shift;
314    my $type = shift;
315    register_intmap($bidi, $type, @_);
316}
317
318sub generate_prohibited {
319    my ($version, $proh) = @_;
320    generate_bitmap($proh, "nameprep_${version}_prohibited");
321    print "\n";
322}
323
324sub generate_prohibited_ref {
325    my ($version, $refversion) = @_;
326    print <<"END";
327#define nameprep_${version}_prohibited_imap	nameprep_${refversion}_prohibited_imap
328#define nameprep_${version}_prohibited_bitmap	nameprep_${refversion}_prohibited_bitmap
329
330END
331}
332
333sub generate_unassigned {
334    my ($version, $unas) = @_;
335    generate_bitmap($unas, "nameprep_${version}_unassigned");
336    print "\n";
337}
338
339sub generate_unassigned_ref {
340    my ($version, $refversion) = @_;
341    print <<"END";
342#define nameprep_${version}_unassigned_imap	nameprep_${refversion}_unassigned_imap
343#define nameprep_${version}_unassigned_bitmap	nameprep_${refversion}_unassigned_bitmap
344
345END
346}
347
348sub generate_bidi {
349    my ($version, $bidi) = @_;
350
351    $bidi->fix();
352
353    print $bidi->cprog(NAME => "nameprep_${version}_bidi");
354    print "\n";
355    print "static const unsigned char nameprep_${version}_bidi_data[] = \{\n";
356
357    foreach my $type (@bidi_types) {
358	printf "\tidn_biditype_%s, \n", lc($type);
359    }
360    print "};\n\n";
361}
362
363sub generate_bidi_ref {
364    my ($version, $refversion) = @_;
365    print <<"END";
366#define nameprep_${version}_bidi_imap	nameprep_${refversion}_bidi_imap
367#define nameprep_${version}_bidi_table	nameprep_${refversion}_bidi_table
368
369END
370}
371
372sub register_bitmap {
373    my $map = shift;
374    my $line = shift;
375
376    /^([0-9A-Fa-f]+)(?:-([0-9A-Fa-f]+))?/ or die "unrecognized line: $line";
377    my $start = hex($1);
378    my $end = defined($2) ? hex($2) : undef;
379    if (defined $end) {
380	$map->add($start .. $end);
381    } else {
382	$map->add($start);
383    }
384}
385
386sub register_intmap {
387    my $map = shift;
388    my $value = shift;
389    my $line = shift;
390
391    /^([0-9A-Fa-f]+)(?:-([0-9A-Fa-f]+))?/ or die "unrecognized line: $line";
392    my $start = hex($1);
393    my $end = defined($2) ? hex($2) : $start;
394    for (my $i = $start; $i <= $end; $i++) {
395	$map->add($i, $value);
396    }
397}
398
399sub generate_bitmap {
400    my $map = shift;
401    my $name = shift;
402    $map->fix();
403    #$map->stat();
404    print $map->cprog(NAME => $name);
405}
406