1293125Sdelphij#! /usr/bin/perl 2293125Sdelphijuse strict; 3293125Sdelphij 4293125Sdelphijmy $USAGE = <<__EOF__; 5293125Sdelphij usage: mkutable [-n] [-f#] type... [--] [<] UnicodeData.txt 6293125Sdelphij -n = take non-matching types 7330571Sdelphij -f = zero-based type field (default 2) 8293125Sdelphij__EOF__ 9293125Sdelphij 10293125Sdelphijuse vars qw( $opt_f $opt_n ); 11293125Sdelphijuse Getopt::Std; 12293125Sdelphijmy $type_field = 2; 13293125Sdelphij 14330571Sdelphij# Override Unicode tables for certain control chars 15330571Sdelphij# that are expected to be found in normal text files. 16330571Sdelphijmy %force_space = ( 17330571Sdelphij 0x08 => 1, # backspace 18330571Sdelphij 0x09 => 1, # tab 19330571Sdelphij 0x0a => 1, # newline 20330571Sdelphij 0x0c => 1, # form feed 21330571Sdelphij 0x0d => 1, # carriage return 22330571Sdelphij); 23293125Sdelphij 24330571Sdelphijexit (main() ? 0 : 1); 25330571Sdelphij 26293125Sdelphijsub main { 27293125Sdelphij my $date = `date`; 28293125Sdelphij chomp $date; 29293125Sdelphij my $args = join ' ', @ARGV; 30293125Sdelphij my $header = "/* Generated by \"$0 $args\" on $date */\n"; 31293125Sdelphij 32293125Sdelphij die $USAGE if not getopts('f:n'); 33293125Sdelphij $type_field = $opt_f if $opt_f; 34293125Sdelphij my %types; 35293125Sdelphij my $arg; 36293125Sdelphij while ($arg = shift @ARGV) { 37293125Sdelphij last if $arg eq '--'; 38293125Sdelphij $types{$arg} = 1; 39293125Sdelphij } 40293125Sdelphij my %out = ( 'types' => \%types ); 41293125Sdelphij 42293125Sdelphij print $header; 43330571Sdelphij my $last_code = 0; 44293125Sdelphij while (<>) { 45293125Sdelphij chomp; 46293125Sdelphij s/#.*//; 47293125Sdelphij my @fields = split /;/; 48293125Sdelphij next if not @fields; 49330571Sdelphij my ($lo_code, $hi_code); 50330571Sdelphij my $codes = $fields[0]; 51330571Sdelphij if ($codes =~ /(\w+)\.\.(\w+)/) { 52330571Sdelphij $lo_code = hex $1; 53330571Sdelphij $hi_code = hex $2; 54330571Sdelphij } else { 55330571Sdelphij $lo_code = $hi_code = hex $fields[0]; 56330571Sdelphij } 57293125Sdelphij my $type = $fields[$type_field]; 58293125Sdelphij $type =~ s/\s//g; 59330571Sdelphij for ($last_code = $lo_code; $last_code <= $hi_code; ++$last_code) { 60330571Sdelphij $type = 'Zs' if $force_space{$last_code}; 61330571Sdelphij output(\%out, $last_code, $type); 62293125Sdelphij } 63293125Sdelphij } 64330571Sdelphij output(\%out, $last_code); 65330571Sdelphij return 1; 66293125Sdelphij} 67293125Sdelphij 68293125Sdelphijsub output { 69293125Sdelphij my ($out, $code, $type) = @_; 70330571Sdelphij my $type_ok = ($type and ${${$out}{types}}{$type}); 71330571Sdelphij $type_ok = not $type_ok if $opt_n; 72330571Sdelphij my $prev_code = $$out{prev_code}; 73330571Sdelphij 74330571Sdelphij if (not $type_ok) { 75330571Sdelphij end_run($out, $prev_code); 76330571Sdelphij } elsif (not $$out{in_run} or $type ne $$out{run_type} or $code != $prev_code+1) { 77330571Sdelphij end_run($out, $prev_code); 78293125Sdelphij start_run($out, $code, $type); 79293125Sdelphij } 80330571Sdelphij $$out{prev_code} = $code; 81293125Sdelphij} 82293125Sdelphij 83293125Sdelphijsub start_run { 84293125Sdelphij my ($out, $code, $type) = @_; 85293125Sdelphij $$out{start_code} = $code; 86330571Sdelphij $$out{prev_code} = $code; 87330571Sdelphij $$out{run_type} = $type; 88293125Sdelphij $$out{in_run} = 1; 89293125Sdelphij} 90293125Sdelphij 91293125Sdelphijsub end_run { 92293125Sdelphij my ($out, $code) = @_; 93293125Sdelphij return if not $$out{in_run}; 94330571Sdelphij printf "\t{ 0x%04x, 0x%04x }, /* %s */\n", $$out{start_code}, $code, $$out{run_type}; 95293125Sdelphij $$out{in_run} = 0; 96293125Sdelphij} 97