mkutable revision 294286
1110603Sphk#! /usr/bin/perl
2110603Sphkuse strict;
3110603Sphk
4110603Sphkmy $USAGE = <<__EOF__;
5110603Sphk   usage: mkutable [-n] [-f#] type... [--] [<] UnicodeData.txt
6110603Sphk          -n = take non-matching types
7110603Sphk	  -f = zero-based type field (default 2)
8110603Sphk__EOF__
9110603Sphk
10110603Sphkuse vars qw( $opt_f $opt_n );
11110603Sphkuse Getopt::Std;
12110603Sphkmy $type_field = 2;
13110603Sphk
14110603Sphkexit (main() ? 1 : 0);
15110603Sphk
16110603Sphksub main {
17110603Sphk    my $date = `date`;
18110603Sphk    chomp $date;
19110603Sphk    my $args = join ' ', @ARGV;
20110603Sphk    my $header = "/* Generated by \"$0 $args\" on $date */\n";
21110603Sphk
22110603Sphk    die $USAGE if not getopts('f:n');
23110603Sphk    $type_field = $opt_f if $opt_f;
24110603Sphk    my %types;
25110603Sphk    my $arg;
26110603Sphk    while ($arg = shift @ARGV) {
27110603Sphk        last if $arg eq '--';
28110603Sphk        $types{$arg} = 1;
29110603Sphk    }
30110603Sphk    my %out = ( 'types' => \%types );
31110603Sphk    my $last_code = 0;
32110603Sphk
33110603Sphk    print $header;
34110603Sphk    while (<>) {
35110603Sphk        chomp;
36110603Sphk        s/#.*//;
37110603Sphk        my @fields = split /;/;
38110603Sphk        next if not @fields;
39201320Sed        my $code = hex $fields[0];
40110603Sphk        my $type = $fields[$type_field];
41110603Sphk        $type =~ s/\s//g;
42180369Slulf        while (++$last_code < $code) {
43180369Slulf            output(\%out, $last_code, '?');
44180369Slulf        }
45110603Sphk        output(\%out, $code, $type);
46180369Slulf    }
47180369Slulf    output(\%out, $last_code+1, '?');
48180369Slulf}
49180369Slulf
50180369Slulfsub output {
51180369Slulf    my ($out, $code, $type) = @_;
52110603Sphk    my $match = ${${$out}{types}}{$type};
53180369Slulf    my $type_change = (not $$out{start_type} or $type ne $$out{start_type});
54180369Slulf    $match = not $match if $opt_n;
55180369Slulf    if ($match and (not $$out{in_run} or $type_change)) {
56110603Sphk        end_run($out, $code-1);
57110603Sphk        start_run($out, $code, $type);
58110603Sphk    } elsif (not $match and $$out{in_run}) {
59180369Slulf        end_run($out, $code-1);
60110603Sphk    }
61}
62
63sub start_run {
64    my ($out, $code, $type) = @_;
65    $$out{start_code} = $code;
66    $$out{start_type} = $type;
67    $$out{in_run} = 1;
68}
69
70sub end_run {
71    my ($out, $code) = @_;
72    return if not $$out{in_run};
73    printf "\t{ 0x%04x, 0x%04x }, /* %s */\n", $$out{start_code}, $code, $$out{start_type};
74    $$out{in_run} = 0;
75}
76