1#!./perl
2
3BEGIN { pop @INC if $INC[-1] eq '.' }
4use strict;
5use Encode;
6use Getopt::Std;
7my %Opt; getopts("ChH:e:f:t:s:pPv", \%Opt);
8$Opt{p} ||= $Opt{P};
9$Opt{e} ||= 'utf8';
10$Opt{f} ||= $Opt{e};
11$Opt{t} ||= $Opt{e};
12$Opt{h} and help();
13
14my ($linebuf, $outbuf);
15my $CPL = $Opt{p} ? 64 : 8;
16my $linenum;
17my $linesperheading = $Opt{H};
18my $nchars;
19our $PrevChunk;
20
21$Opt{h} and help();
22$Opt{p} and do_perl($Opt{s});
23do_dump($Opt{s});
24exit;
25
26#
27
28sub do_perl{
29    my $string = shift;
30    $Opt{P} and print "#!$^X -w\nprint\n";
31    unless ($string){
32    while(<>){
33        use utf8;
34        $linebuf .=  Encode::decode($Opt{f}, $_);
35        while($linebuf){
36        my $chr =  render_p(substr($linebuf, 0, 1, ''));
37        length($outbuf) + length($chr) > $CPL and print_P();
38        $outbuf .= $chr;
39        }
40    }
41    $outbuf and print print_P(";");
42    }else{
43    while($string){
44        my $chr =  render_p(substr($string, 0, 1, ''));
45        length($outbuf) + length($chr) > $CPL and print_P();
46        $outbuf .= $chr;
47    }
48    }
49    $outbuf and print print_P(";");
50    exit;
51}
52
53sub render_p{
54    my ($chr, $format) = @_;
55    our %S2pstr;
56    $S2pstr{$chr} and return $S2pstr{$chr}; # \t\n...
57    $chr =~ /[\x20-\x7e]/ and return $chr;  # ascii, printable;
58    my $fmt = ($chr =~ /[\x00-\x1f\x7F]/)  ?
59    q(\x%x) : q(\x{%x});
60    return sprintf $fmt, ord($chr);
61}
62
63sub print_P{
64    my $end = shift;
65    $outbuf or return;
66    print '"', encode($Opt{t}, $outbuf), '"';
67    my $tail = $Opt{P} ? $end ? "$end" :  "," : '';
68    print $tail, "\n";
69    $outbuf = '';
70}
71
72sub do_dump{
73    my $string = shift;
74    !$Opt{p} and exists $Opt{H} and print_H();
75    unless ($string){
76    while(<>){
77        use utf8;
78        $linebuf .=  Encode::decode($Opt{f}, $_);
79        while (length($linebuf) > $CPL){
80        my $chunk = substr($linebuf, 0, $CPL, '');
81        print_C($chunk, $linenum++);
82        $Opt{H} and $linenum % $Opt{H} == $CPL-1 and print_S();
83        }
84    }
85    $linebuf and print_C($linebuf);
86    }else{
87    while ($string){
88        my $chunk = substr($string, 0, $CPL, '');
89        print_C($chunk, $linenum++);
90        $Opt{H} and $linenum % $Opt{H} == $CPL-1 and print_S();
91    }
92    }
93    exit;
94}
95
96sub print_S{
97    print "--------+------------------------------------------------";
98    if ($Opt{C}){
99    print "-+-----------------";
100    }
101    print "\n";
102}
103sub print_H{
104    print "  Offset      0     1     2     3     4     5     6     7";
105    if ($Opt{C}){
106    print " |  0 1 2 3 4 5 6 7";
107    }
108    print "\n";
109    print_S;
110}
111
112sub print_C{
113    my ($chunk, $linenum) = @_;
114    if (!$Opt{v} and $chunk eq $PrevChunk){
115    printf "%08x *\n", $linenum*8; return;
116    }
117    $PrevChunk = $chunk;
118    my $end = length($chunk) - 1;
119    my (@ord, @chr);
120    for my $i (0..$end){
121    use utf8;
122    my $chr = substr($chunk,$i,1);
123    my $ord = ord($chr);
124    my $fmt = $ord <= 0xffff ? "  %04x" : " %05x";
125    push @ord, (sprintf $fmt, $ord);
126    $Opt{C} and push @chr, render_c($chr);
127    }
128    if (++$end < 7){
129    for my $i ($end..7){
130        push @ord, (" " x 6);
131    }
132    }
133    my $line = sprintf "%08x %s", $linenum*8, join('', @ord);
134    $Opt{C} and $line .= sprintf " | %s",  join('', @chr);
135    print encode($Opt{t}, $line), "\n";
136}
137
138sub render_c{
139    my ($chr, $format) = @_;
140    our (%S2str, $IsFullWidth);
141    $chr =~ /[\p{IsControl}\s]/o and return $S2str{$chr} || "  ";
142    $chr =~ $IsFullWidth and return $chr; # as is
143    return " " . $chr;
144}
145
146sub help{
147    my $message = shift;
148    use File::Basename;
149    my $name = basename($0);
150    $message and print STDERR "$name error: $message\n";
151    print STDERR <<"EOT";
152Usage:
153  $name -[options...] [files...]
154  $name -[options...] -s "string"
155  $name -h
156  -h prints this message.
157Inherited from hexdump;
158  -C Canonical unidump mode
159  -v prints the duplicate line as is.  Without this option,
160     single "*" will be printed instead.
161For unidump only
162  -p prints in perl literals that you can copy and paste directly
163     to your perl script.
164  -P prints in perl executable format!
165  -u prints a bunch of "Uxxxx,".  Handy when you want to pass your
166     characters in mailing lists. 
167IO Options:
168  -e io_encoding    same as "-f io_encoding -t io_encoding"
169  -f from_encoding  convert the source stream from this encoding
170  -t to_encoding    print to STDOUT in this encoding
171  -s string         "string" will be converted instead of STDIN.
172  -H nline          prints separater for each nlines of output.
173                    0 means only the table headding be printed.
174EOT
175  exit;
176}
177
178BEGIN{
179    our %S2pstr= (
180          "\\" => '\\\\',
181          "\0" => '\0',
182          "\t" => '\t',
183          "\n" => '\n',
184          "\r" => '\r',
185          "\v" => '\v',
186          "\a" => '\a',
187          "\e" => '\e',
188          "\"" => qq(\\\"),
189          "\'" => qq(\\\'),
190          '$'  => '\$',
191          "@"  => '\@',
192          "%"  => '\%',
193         );
194
195    our %S2str = (
196          qq(\x00) => q(\0),  # NULL
197          qq(\x01) => q(^A),  # START OF HEADING
198          qq(\x02) => q(^B),  # START OF TEXT
199          qq(\x03) => q(^C),  # END OF TEXT
200          qq(\x04) => q(^D),  # END OF TRANSMISSION
201          qq(\x05) => q(^E),  # ENQUIRY
202          qq(\x06) => q(^F),  # ACKNOWLEDGE
203          qq(\x07) => q(\a),  # BELL
204          qq(\x08) => q(^H),  # BACKSPACE
205          qq(\x09) => q(\t),  # HORIZONTAL TABULATION
206          qq(\x0A) => q(\n),  # LINE FEED
207          qq(\x0B) => q(\v),  # VERTICAL TABULATION
208          qq(\x0C) => q(^L),  # FORM FEED
209          qq(\x0D) => q(\r),  # CARRIAGE RETURN
210          qq(\x0E) => q(^N),  # SHIFT OUT
211          qq(\x0F) => q(^O),  # SHIFT IN
212          qq(\x10) => q(^P),  # DATA LINK ESCAPE
213          qq(\x11) => q(^Q),  # DEVICE CONTROL ONE
214          qq(\x12) => q(^R),  # DEVICE CONTROL TWO
215          qq(\x13) => q(^S),  # DEVICE CONTROL THREE
216          qq(\x14) => q(^T),  # DEVICE CONTROL FOUR
217          qq(\x15) => q(^U),  # NEGATIVE ACKNOWLEDGE
218          qq(\x16) => q(^V),  # SYNCHRONOUS IDLE
219          qq(\x17) => q(^W),  # END OF TRANSMISSION BLOCK
220          qq(\x18) => q(^X),  # CANCEL
221          qq(\x19) => q(^Y),  # END OF MEDIUM
222          qq(\x1A) => q(^Z),  # SUBSTITUTE
223          qq(\x1B) => q(\e),  # ESCAPE (\c[)
224          qq(\x1C) => "^\\",  # FILE SEPARATOR
225          qq(\x1D) => "^\]",  # GROUP SEPARATOR
226          qq(\x1E) => q(^^),  # RECORD SEPARATOR
227          qq(\x1F) => q(^_),  # UNIT SEPARATOR
228          );
229    #
230    # Generated out of lib/unicore/EastAsianWidth.txt 
231    # will it work ?
232    #		  
233    our $IsFullWidth = 
234    qr/^[
235         \x{1100}-\x{1159}
236         \x{115F}-\x{115F}
237         \x{2329}-\x{232A}
238         \x{2E80}-\x{2E99}
239         \x{2E9B}-\x{2EF3}
240         \x{2F00}-\x{2FD5}
241         \x{2FF0}-\x{2FFB}
242         \x{3000}-\x{303E}
243         \x{3041}-\x{3096}
244         \x{3099}-\x{30FF}
245         \x{3105}-\x{312C}
246         \x{3131}-\x{318E}
247         \x{3190}-\x{31B7}
248         \x{31F0}-\x{321C}
249         \x{3220}-\x{3243}
250         \x{3251}-\x{327B}
251         \x{327F}-\x{32CB}
252         \x{32D0}-\x{32FE}
253         \x{3300}-\x{3376}
254         \x{337B}-\x{33DD}
255         \x{3400}-\x{4DB5}
256         \x{4E00}-\x{9FA5}
257         \x{33E0}-\x{33FE}
258         \x{A000}-\x{A48C}
259         \x{AC00}-\x{D7A3}
260         \x{A490}-\x{A4C6}
261         \x{F900}-\x{FA2D}
262         \x{FA30}-\x{FA6A}
263         \x{FE30}-\x{FE46}
264         \x{FE49}-\x{FE52}
265         \x{FE54}-\x{FE66}
266         \x{FE68}-\x{FE6B}
267         \x{FF01}-\x{FF60}
268         \x{FFE0}-\x{FFE6}
269         \x{20000}-\x{2A6D6}
270     ]$/xo;
271}
272
273__END__
274