1293125Sdelphij#! /usr/bin/perl
2293125Sdelphijuse strict;
3293125Sdelphij
4293125Sdelphijmy $USAGE = <<__EOF__;
5293125Sdelphij   usage: mkutable [-n] [-f#] type... [--] [<] UnicodeData.txt
6293125Sdelphij          -n = take non-matching types
7330571Sdelphij      -f = zero-based type field (default 2)
8293125Sdelphij__EOF__
9293125Sdelphij
10293125Sdelphijuse vars qw( $opt_f $opt_n );
11293125Sdelphijuse Getopt::Std;
12293125Sdelphijmy $type_field = 2;
13293125Sdelphij
14330571Sdelphij# Override Unicode tables for certain control chars
15330571Sdelphij# that are expected to be found in normal text files.
16330571Sdelphijmy %force_space = (
17330571Sdelphij    0x08 => 1, # backspace
18330571Sdelphij    0x09 => 1, # tab
19330571Sdelphij    0x0a => 1, # newline
20330571Sdelphij    0x0c => 1, # form feed
21330571Sdelphij    0x0d => 1, # carriage return
22330571Sdelphij);
23293125Sdelphij
24330571Sdelphijexit (main() ? 0 : 1);
25330571Sdelphij
26293125Sdelphijsub main {
27293125Sdelphij    my $date = `date`;
28293125Sdelphij    chomp $date;
29293125Sdelphij    my $args = join ' ', @ARGV;
30293125Sdelphij    my $header = "/* Generated by \"$0 $args\" on $date */\n";
31293125Sdelphij
32293125Sdelphij    die $USAGE if not getopts('f:n');
33293125Sdelphij    $type_field = $opt_f if $opt_f;
34293125Sdelphij    my %types;
35293125Sdelphij    my $arg;
36293125Sdelphij    while ($arg = shift @ARGV) {
37293125Sdelphij        last if $arg eq '--';
38293125Sdelphij        $types{$arg} = 1;
39293125Sdelphij    }
40293125Sdelphij    my %out = ( 'types' => \%types );
41293125Sdelphij
42293125Sdelphij    print $header;
43330571Sdelphij    my $last_code = 0;
44293125Sdelphij    while (<>) {
45293125Sdelphij        chomp;
46293125Sdelphij        s/#.*//;
47293125Sdelphij        my @fields = split /;/;
48293125Sdelphij        next if not @fields;
49330571Sdelphij        my ($lo_code, $hi_code);
50330571Sdelphij        my $codes = $fields[0];
51330571Sdelphij        if ($codes =~ /(\w+)\.\.(\w+)/) {
52330571Sdelphij            $lo_code = hex $1;
53330571Sdelphij            $hi_code = hex $2;
54330571Sdelphij        } else {
55330571Sdelphij            $lo_code = $hi_code = hex $fields[0];
56330571Sdelphij        }
57293125Sdelphij        my $type = $fields[$type_field];
58293125Sdelphij        $type =~ s/\s//g;
59330571Sdelphij        for ($last_code = $lo_code; $last_code <= $hi_code; ++$last_code) {
60330571Sdelphij            $type = 'Zs' if $force_space{$last_code};
61330571Sdelphij            output(\%out, $last_code, $type);
62293125Sdelphij        }
63293125Sdelphij    }
64330571Sdelphij    output(\%out, $last_code);
65330571Sdelphij    return 1;
66293125Sdelphij}
67293125Sdelphij
68293125Sdelphijsub output {
69293125Sdelphij    my ($out, $code, $type) = @_;
70330571Sdelphij    my $type_ok = ($type and ${${$out}{types}}{$type});
71330571Sdelphij    $type_ok = not $type_ok if $opt_n;
72330571Sdelphij    my $prev_code = $$out{prev_code};
73330571Sdelphij
74330571Sdelphij    if (not $type_ok) {
75330571Sdelphij        end_run($out, $prev_code);
76330571Sdelphij    } elsif (not $$out{in_run} or $type ne $$out{run_type} or $code != $prev_code+1) {
77330571Sdelphij        end_run($out, $prev_code);
78293125Sdelphij        start_run($out, $code, $type);
79293125Sdelphij    }
80330571Sdelphij    $$out{prev_code} = $code;
81293125Sdelphij}
82293125Sdelphij
83293125Sdelphijsub start_run {
84293125Sdelphij    my ($out, $code, $type) = @_;
85293125Sdelphij    $$out{start_code} = $code;
86330571Sdelphij    $$out{prev_code} = $code;
87330571Sdelphij    $$out{run_type} = $type;
88293125Sdelphij    $$out{in_run} = 1;
89293125Sdelphij}
90293125Sdelphij
91293125Sdelphijsub end_run {
92293125Sdelphij    my ($out, $code) = @_;
93293125Sdelphij    return if not $$out{in_run};
94330571Sdelphij    printf "\t{ 0x%04x, 0x%04x }, /* %s */\n", $$out{start_code}, $code, $$out{run_type};
95293125Sdelphij    $$out{in_run} = 0;
96293125Sdelphij}
97