1#!/usr/bin/perl 2# $FreeBSD$ 3 4use Text::Iconv; 5use Encode; 6use strict; 7use utf8; 8 9# command line parsing 10die "Usage: $0 filename.kbd charset [EURO|YEN]\n" 11 unless ($ARGV[1]); 12 13my $inputfile = shift; # first command argument 14my $converter = Text::Iconv->new(shift, "UTF-8"); # second argument 15my $use_euro; 16my $use_yen; 17my $current_char; 18my $current_scancode; 19 20while (my $arg = shift) { 21 $use_euro = 1, next 22 if $arg eq "EURO"; 23 $use_yen = 1, next 24 if $arg eq "YEN"; 25 die "Unknown encoding option '$arg'\n"; 26} 27 28# converter functions 29sub local_to_UCS_string 30{ 31 my ($string) = @_; 32 33 return $converter->convert($string); 34} 35 36sub prettyprint_token 37{ 38 my ($ucs_char) = @_; 39 40 return "'" . chr($ucs_char) . "'" 41 if 32 <= $ucs_char and $ucs_char <= 126; # print as ASCII if possible 42# return sprintf "%d", $ucs_char; # <---- temporary decimal 43 return sprintf "0x%02x", $ucs_char 44 if $ucs_char <= 255; # print as hex number, else 45 return sprintf "0x%04x", $ucs_char; 46} 47 48sub local_to_UCS_code 49{ 50 my ($char) = @_; 51 52 my $ucs_char = ord(Encode::decode("UTF-8", local_to_UCS_string($char))); 53 54 $current_char = lc(chr($ucs_char)) 55 if $current_char eq ""; 56 57 $ucs_char = 0x20ac # replace with Euro character 58 if $ucs_char == 0xa4 and $use_euro and $current_char eq "e"; 59 60 $ucs_char = 0xa5 # replace with Jap. Yen character on PC kbd 61 if $ucs_char == ord('\\') and $use_yen and $current_scancode == 125; 62 63# $ucs_char = 0xa5 # replace with Jap. Yen character on PC98x1 kbd 64# if $ucs_char == ord('\\') and $use_yen and $current_scancode == 13; 65 66 return prettyprint_token($ucs_char); 67} 68 69sub malformed_to_UCS_code 70{ 71 my ($char) = @_; 72 73 return prettyprint_token(ord(Encode::decode("UTF-8", $char))); 74} 75 76sub convert_token 77{ 78 my ($C) = @_; 79 80 return $1 81 if $C =~ m/^([a-z][a-z0-9]*)$/; # key token 82 return local_to_UCS_code(chr($1)) 83 if $C =~ m/^(\d+)$/; # decimal number 84 return local_to_UCS_code(chr(hex($1))) 85 if $C =~ m/^0x([0-9a-f]+)$/i; # hex number 86 return local_to_UCS_code(chr(ord($1))) 87 if $C =~ m/^'(.)'$/; # character 88 return malformed_to_UCS_code($1) 89 if $C =~ m/^'(.+)'$/; # character 90 return "<?$C?>"; # uncovered case 91} 92 93sub tokenize { # split on white space and parentheses (but not within token) 94 my ($line) = @_; 95 96 $line =~ s/'\('/ _lpar_ /g; # prevent splitting of '(' 97 $line =~ s/'\)'/ _rpar_ /g; # prevent splitting of ')' 98 $line =~ s/'''/'_squote_'/g; # remove quoted single quotes from matches below 99 $line =~ s/([()])/ $1 /g; # insert blanks around remaining parentheses 100 my $matches; 101 do { 102 $matches = ($line =~ s/^([^']*)'([^']+)'/$1_squoteL_$2_squoteR_/g); 103 } while $matches; 104 $line =~ s/_squoteL_ _squoteR_/ _spc_ /g; # prevent splitting of ' ' 105 my @KEYTOKEN = split (" ", $line); 106 grep(s/_squote[LR]?_/'/g, @KEYTOKEN); 107 grep(s/_spc_/' '/, @KEYTOKEN); 108 grep(s/_lpar_/'('/, @KEYTOKEN); 109 grep(s/_rpar_/')'/, @KEYTOKEN); 110 return @KEYTOKEN; 111} 112 113# main program 114open FH, "<$inputfile"; 115while (<FH>) { 116 if (m/^#/) { 117 print local_to_UCS_string($_); 118 } elsif (m/^\s*$/) { 119 print "\n"; 120 } else { 121 my @KEYTOKEN = tokenize($_); 122 my $at_bol = 1; 123 my $C; 124 foreach $C (@KEYTOKEN) { 125 if ($at_bol) { 126 $current_char = ""; 127 $current_scancode = -1; 128 if ($C =~ m/^\s*\d/) { # line begins with key code number 129 $current_scancode = $C; 130 printf " %03d ", $C; 131 } elsif ($C =~ m/^[a-z]/) { # line begins with accent name or paren 132 printf " %-4s ", $C; # accent name starts accent definition 133 } elsif ($C eq "(") { 134 printf "%17s", "( "; # paren continues accent definition 135 } else { 136 print "Unknown input line format: $_"; 137 } 138 $at_bol = 0; 139 } else { 140 if ($C =~ m/^([BCNO])$/) { 141 print " $1"; # special case: effect of Caps Lock/Num Lock 142 } elsif ($C eq "(") { 143 $current_char = ""; 144 print " ( "; 145 } elsif ($C eq ")") { 146 print " )"; 147 } else { 148 printf "%-6s ", convert_token($C); 149 } 150 } 151 } 152 print "\n"; 153 } 154} 155close FH; 156