1#!/usr/bin/perl
2# $FreeBSD$
3
4use Text::Iconv;
5use Encode;
6use strict;
7use utf8;
8
9# command line parsing
10die "Usage: $0 filename.kbd charset [EURO|YEN]\n"
11    unless ($ARGV[1]);
12
13my $inputfile = shift;					# first command argument
14my $converter = Text::Iconv->new(shift, "UTF-8");	# second argument
15my $use_euro;
16my $use_yen;
17my $current_char;
18my $current_scancode;
19
20while (my $arg = shift) {
21    $use_euro = 1, next
22	if $arg eq "EURO";
23    $use_yen = 1, next
24	if $arg eq "YEN";
25    die "Unknown encoding option '$arg'\n";
26}
27
28# converter functions
29sub local_to_UCS_string
30{
31    my ($string) = @_;
32
33    return $converter->convert($string);
34}
35
36sub prettyprint_token
37{
38    my ($ucs_char) = @_;
39
40    return "'" . chr($ucs_char) . "'"
41        if 32 <= $ucs_char and $ucs_char <= 126; # print as ASCII if possible
42#    return sprintf "%d", $ucs_char; # <---- temporary decimal
43    return sprintf "0x%02x", $ucs_char
44        if $ucs_char <= 255;        # print as hex number, else
45    return sprintf "0x%04x", $ucs_char;
46}
47
48sub local_to_UCS_code
49{
50    my ($char) = @_;
51
52    my $ucs_char = ord(Encode::decode("UTF-8", local_to_UCS_string($char)));
53
54    $current_char = lc(chr($ucs_char))
55	if $current_char eq "";
56
57    $ucs_char = 0x20ac	# replace with Euro character
58	if $ucs_char == 0xa4 and $use_euro and $current_char eq "e";
59
60    $ucs_char = 0xa5	# replace with Jap. Yen character on PC kbd
61	if $ucs_char == ord('\\') and $use_yen and $current_scancode == 125;
62
63#    $ucs_char = 0xa5	# replace with Jap. Yen character on PC98x1 kbd
64#	if $ucs_char == ord('\\') and $use_yen and $current_scancode == 13;
65
66    return prettyprint_token($ucs_char);
67}
68
69sub malformed_to_UCS_code
70{
71    my ($char) = @_;
72
73    return prettyprint_token(ord(Encode::decode("UTF-8", $char)));
74}
75
76sub convert_token
77{
78    my ($C) = @_;
79
80    return $1
81        if $C =~ m/^([a-z][a-z0-9]*)$/;		# key token
82    return local_to_UCS_code(chr($1))
83        if $C =~ m/^(\d+)$/;			# decimal number
84    return local_to_UCS_code(chr(hex($1)))
85        if $C =~ m/^0x([0-9a-f]+)$/i;		# hex number
86    return local_to_UCS_code(chr(ord($1)))
87        if $C =~ m/^'(.)'$/;			# character
88    return malformed_to_UCS_code($1)
89        if $C =~ m/^'(.+)'$/;			# character
90    return "<?$C?>";				# uncovered case
91}
92
93sub tokenize { # split on white space and parentheses (but not within token)
94    my ($line) = @_;
95
96    $line =~ s/'\('/ _lpar_ /g; # prevent splitting of '('
97    $line =~ s/'\)'/ _rpar_ /g; # prevent splitting of ')'
98    $line =~ s/'''/'_squote_'/g; # remove quoted single quotes from matches below
99    $line =~ s/([()])/ $1 /g; # insert blanks around remaining parentheses
100    my $matches;
101    do {
102	$matches = ($line =~ s/^([^']*)'([^']+)'/$1_squoteL_$2_squoteR_/g);
103    } while $matches;
104    $line =~ s/_squoteL_ _squoteR_/ _spc_ /g; # prevent splitting of ' '
105    my @KEYTOKEN = split (" ", $line);
106    grep(s/_squote[LR]?_/'/g, @KEYTOKEN);
107    grep(s/_spc_/' '/, @KEYTOKEN);
108    grep(s/_lpar_/'('/, @KEYTOKEN);
109    grep(s/_rpar_/')'/, @KEYTOKEN);
110    return @KEYTOKEN;
111}
112
113# main program
114open FH, "<$inputfile";
115while (<FH>) {
116    if (m/^#/) {
117	print local_to_UCS_string($_);
118    } elsif (m/^\s*$/) {
119	print "\n";
120    } else {
121	my @KEYTOKEN = tokenize($_);
122	my $at_bol = 1;
123	my $C;
124	foreach $C (@KEYTOKEN) {
125	    if ($at_bol) {
126		$current_char = "";
127		$current_scancode = -1;
128		if ($C =~ m/^\s*\d/) { # line begins with key code number
129		    $current_scancode = $C;
130		    printf "  %03d   ", $C;
131		} elsif ($C =~ m/^[a-z]/) { # line begins with accent name or paren
132		    printf "  %-4s ", $C; # accent name starts accent definition
133		} elsif ($C eq "(") {
134		    printf "%17s", "( "; # paren continues accent definition
135		} else {
136		    print "Unknown input line format: $_";
137		}
138		$at_bol = 0;
139	    } else {
140		if ($C =~ m/^([BCNO])$/) {
141		    print " $1"; # special case: effect of Caps Lock/Num Lock
142		} elsif ($C eq "(") {
143		    $current_char = "";
144		    print " ( ";
145		} elsif ($C eq ")") {
146		    print " )";
147		} else {
148		    printf "%-6s ", convert_token($C);
149		}
150	    }
151	}
152	print "\n";
153    }
154}
155close FH;
156