1285169Scy#!/usr/bin/env perl 2132451Sroberto 354359Srobertouse Getopt::Std; 4285169Scy$DEBUG = 0; 5285169Scy 654359Srobertosub parse_objdump_file { 754359Sroberto my ($filename) = @_; 854359Sroberto my @result; 954359Sroberto open (INPUT, $filename) or die "$filename: $!\n"; 1054359Sroberto print "opened objdump output file $filename\n" if $DEBUG; 1154359Sroberto while (<INPUT>) { 1254359Sroberto if (/\s*([0-9a-f]*):\t(([0-9a-f]{2} )+) *\t(.*)$/) { 1354359Sroberto my ($addr, $bytes, $instr) = ($1, $2, $4); 1454359Sroberto $addr = "0x" . $addr; 15106163Sroberto $bytes =~ s/\s*(.*\S)\s*/$1/; # trim any remaining whitespace 16106163Sroberto $instr =~ s/\s*(.*\S)\s*/$1/; 17280849Scy push (@result, {'addr' => $addr, 'bytes' => $bytes, 'instr' => $instr}); 1854359Sroberto print "addr=$addr bytes='$bytes' instr='$instr'\n" if $DEBUG; 19285169Scy } 20285169Scy } 21285169Scy close INPUT; 22285169Scy return @result; 23285169Scy} 24285169Scy 25285169Scysub parse_gdb_file { 26285169Scy my ($filename) = @_; 27285169Scy my @result; 28285169Scy my $got_addr; 29285169Scy open (INPUT, $filename) or die "$filename: $!\n"; 30285169Scy print "opened gdb output file $filename\n" if $DEBUG; 31285169Scy while (<INPUT>) { 32285169Scy if (/^(0x[0-9a-f]*):\t([^\t]*)\t[^:]*:\t((0x[0-9a-f]{2}\s*)+)\s*$/) { 33285169Scy my ($addr, $bytes, $instr) = ($1, $3, $2); 34285169Scy $bytes =~ s/0x//g; 35285169Scy $bytes =~ s/\s+/ /g; # regularize whitespace 36285169Scy $bytes =~ s/\s*(.*\S)\s*/$1/; # trim any remaining whitespace 37285169Scy $instr =~ s/\s*(.*\S)\s*/$1/; 38285169Scy push (@result, {'addr' => $addr, 'bytes' => $bytes, 'instr' => $instr}); 39285169Scy print "addr=$addr bytes='$bytes' instr='$instr'\n" if $DEBUG; 40285169Scy } elsif (/^(0x[0-9a-f]*):\t$/) { # deal with gdb's line breaker 41285169Scy $got_addr = $1; 42285169Scy } elsif ($got_addr && /^ ([^\t]*)\t[^:]*:\t((0x[0-9a-f]{2}\s*)+)\s*$/) { 43285169Scy my ($addr, $bytes, $instr) = ($got_addr, $2, $1); 44285169Scy $bytes =~ s/0x//g; 45285169Scy $bytes =~ s/\s+/ /g; # regularize whitespace 46285169Scy $bytes =~ s/\s*(.*\S)\s*/$1/; # trim any remaining whitespace 47285169Scy $instr =~ s/\s*(.*\S)\s*/$1/; 48285169Scy push (@result, {'addr' => $addr, 'bytes' => $bytes, 'instr' => $instr}); 49285169Scy print "addr=$addr bytes='$bytes' instr='$instr'\n" if $DEBUG; 50285169Scy undef $got_addr; 51285169Scy } 52285169Scy } 53285169Scy close INPUT; 54285169Scy return @result; 55285169Scy} 56285169Scy 57285169Scysub binary_diffs { 58285169Scy my ($objdump_file, $gdb_file) = @_; 59285169Scy my @file1 = parse_objdump_file ($objdump_file); 60285169Scy my @file2 = parse_gdb_file ($gdb_file); 61285169Scy my $lastrecord = ($#file1 >= $#file2) ? ($#file1) : ($#file2); 62285169Scy for (my $i = 0; $i <= $lastrecord; ++$i) { 63285169Scy my $d1 = $file1[$i]; 64285169Scy my $d2 = $file2[$i]; 65285169Scy if ($d1->{'bytes'} ne $d2->{'bytes'}) { 66285169Scy next if (($d1->{'instr'} eq $d2->{'instr'}) && $opt_d); 67285169Scy printf "0x%08x:\t%30s \t%s\n", 0+$d1->{'addr'}, $d1->{'bytes'}, $d1->{'instr'}; 68285169Scy printf "0x%08x:\t%30s \t%s\n\n", 0+$d2->{'addr'}, $d2->{'bytes'}, $d2->{'instr'}; 69285169Scy } 70285169Scy } 71285169Scy} 72285169Scy 73285169Scy&getopts('d'); 7454359Sroberto$objdump_file = $ARGV[0]; 75200576Sroberto$gdb_file = $ARGV[1]; 7654359Srobertobinary_diffs ($objdump_file, $gdb_file); 77200576Srobertoexit (0); 78132451Sroberto__END__ 79132451Sroberto=pod 80132451Sroberto 81132451Sroberto=head1 NAME 8282498Sroberto 83132451Srobertocodegen-diff 8454359Sroberto 8554359Sroberto=head1 SYNOPSIS 8654359Sroberto 8754359Srobertocodegen-diff [-d] I<OBJDUMP-OUTPUT-FILE> I<GDB-DISASSEMBLY-FILE> 8854359Sroberto 8954359Sroberto=head1 DESCRIPTION 9054359Sroberto 9154359SrobertoB<codegen-diff> is a program that tries to show you the differences 92182007Srobertobetween the code that B<llc> generated and the code that B<lli> generated. 93182007Sroberto 94280849ScyThe way you use it is as follows: first, you create I<OBJDUMP-OUTPUT-FILE> 95280849Scyby running B<objdump> on the B<llc> compiled and linked binary. You need to 96282408Scytrim down the result so it contains only the function of interest. 97280849Scy 98280849ScySecond, you create I<GDB-DISASSEMBLY-FILE> by running B<gdb>, with my patch 99280849Scyto print out hex bytes in the B<disassemble> command output, on 100280849ScyB<lli>. Set a breakpoint in C<Emitter::finishFunction()> and wait until 101280849Scythe function you want is compiled. Then use the B<disassemble> command 102280849Scyto print out the assembly dump of the function B<lli> just compiled. 103280849Scy(Use C<lli -debug> to find out where the function starts and ends in memory.) 104280849ScyIt's easiest to save this output by using B<script>. 105280849Scy 106280849ScyFinally, you run B<codegen-diff>, as indicated in the Synopsis section of 107280849Scythis manpage. It will print out a two-line stanza for each mismatched 108280849Scyinstruction, with the B<llc> version first, and the B<lli> version second. 109280849Scy 110280849Scy=head1 OPTIONS 111280849Scy 112316722Sdelphij=over 4 113280849Scy 114280849Scy=item -d 115280849Scy 116280849ScyDon't show instructions where the bytes are different but they 117280849Scydisassemble to the same thing. This puts a lot of trust in the 118280849Scydisassembler, but it might help you highlight the more egregious cases 119280849Scyof misassembly. 120280849Scy 121289764Sglebius=back 122280849Scy 123280849Scy=head1 AUTHOR 124280849Scy 125285169ScyB<codegen-diff> was written by Brian Gaeke. 126280849Scy 127280849Scy=head1 SEE ALSO 128282408Scy 129282408ScyL<gdb(1)>, L<objdump(1)>, L<script(1)>. 130280849Scy 131280849ScyYou will need my B<gdb> patch: 132280849Scy 133182007Sroberto http://llvm.cs.uiuc.edu/~gaeke/gdb-disassembly-print-bytes.patch 134182007Sroberto 135285169Scy=cut 136182007Sroberto