1#!/usr/bin/env perl 2 3# Ascetic x86_64 AT&T to MASM assembler translator by <appro>. 4# 5# Why AT&T to MASM and not vice versa? Several reasons. Because AT&T 6# format is way easier to parse. Because it's simpler to "gear" from 7# Unix ABI to Windows one [see cross-reference "card" at the end of 8# file]. Because Linux targets were available first... 9# 10# In addition the script also "distills" code suitable for GNU 11# assembler, so that it can be compiled with more rigid assemblers, 12# such as Solaris /usr/ccs/bin/as. 13# 14# This translator is not designed to convert *arbitrary* assembler 15# code from AT&T format to MASM one. It's designed to convert just 16# enough to provide for dual-ABI OpenSSL modules development... 17# There *are* limitations and you might have to modify your assembler 18# code or this script to achieve the desired result... 19# 20# Currently recognized limitations: 21# 22# - can't use multiple ops per line; 23# - indirect calls and jumps are not supported; 24# 25# Dual-ABI styling rules. 26# 27# 1. Adhere to Unix register and stack layout [see the end for 28# explanation]. 29# 2. Forget about "red zone," stick to more traditional blended 30# stack frame allocation. If volatile storage is actually required 31# that is. If not, just leave the stack as is. 32# 3. Functions tagged with ".type name,@function" get crafted with 33# unified Win64 prologue and epilogue automatically. If you want 34# to take care of ABI differences yourself, tag functions as 35# ".type name,@abi-omnipotent" instead. 36# 4. To optimize the Win64 prologue you can specify number of input 37# arguments as ".type name,@function,N." Keep in mind that if N is 38# larger than 6, then you *have to* write "abi-omnipotent" code, 39# because >6 cases can't be addressed with unified prologue. 40# 5. Name local labels as .L*, do *not* use dynamic labels such as 1: 41# (sorry about latter). 42# 6. Don't use [or hand-code with .byte] "rep ret." "ret" mnemonic is 43# required to identify the spots, where to inject Win64 epilogue! 44# But on the pros, it's then prefixed with rep automatically:-) 45# 7. Due to MASM limitations [and certain general counter-intuitivity 46# of ip-relative addressing] generation of position-independent 47# code is assisted by synthetic directive, .picmeup, which puts 48# address of the *next* instruction into target register. 49# 50# Example 1: 51# .picmeup %rax 52# lea .Label-.(%rax),%rax 53# Example 2: 54# .picmeup %rcx 55# .Lpic_point: 56# ... 57# lea .Label-.Lpic_point(%rcx),%rbp 58 59my $output = shift; 60open STDOUT,">$output" || die "can't open $output: $!"; 61 62my $masm=1 if ($output =~ /\.asm/); 63 64my $current_segment; 65my $current_function; 66 67{ package opcode; # pick up opcodes 68 sub re { 69 my $self = shift; # single instance in enough... 70 local *line = shift; 71 undef $ret; 72 73 if ($line =~ /^([a-z]+)/i) { 74 $self->{op} = $1; 75 $ret = $self; 76 $line = substr($line,@+[0]); $line =~ s/^\s+//; 77 78 undef $self->{sz}; 79 if ($self->{op} =~ /(movz)b.*/) { # movz is pain... 80 $self->{op} = $1; 81 $self->{sz} = "b"; 82 } elsif ($self->{op} =~ /([a-z]{3,})([qlwb])/) { 83 $self->{op} = $1; 84 $self->{sz} = $2; 85 } 86 } 87 $ret; 88 } 89 sub size { 90 my $self = shift; 91 my $sz = shift; 92 $self->{sz} = $sz if (defined($sz) && !defined($self->{sz})); 93 $self->{sz}; 94 } 95 sub out { 96 my $self = shift; 97 if (!$masm) { 98 if ($self->{op} eq "movz") { # movz in pain... 99 sprintf "%s%s%s",$self->{op},$self->{sz},shift; 100 } elsif ($self->{op} eq "ret") { 101 ".byte 0xf3,0xc3"; 102 } else { 103 "$self->{op}$self->{sz}"; 104 } 105 } else { 106 $self->{op} =~ s/movz/movzx/; 107 if ($self->{op} eq "ret") { 108 $self->{op} = ""; 109 if ($current_function->{abi} eq "svr4") { 110 $self->{op} = "mov rdi,QWORD PTR 8[rsp]\t;WIN64 epilogue\n\t". 111 "mov rsi,QWORD PTR 16[rsp]\n\t"; 112 } 113 $self->{op} .= "DB\t0F3h,0C3h\t\t;repret"; 114 } 115 $self->{op}; 116 } 117 } 118} 119{ package const; # pick up constants, which start with $ 120 sub re { 121 my $self = shift; # single instance in enough... 122 local *line = shift; 123 undef $ret; 124 125 if ($line =~ /^\$([^,]+)/) { 126 $self->{value} = $1; 127 $ret = $self; 128 $line = substr($line,@+[0]); $line =~ s/^\s+//; 129 } 130 $ret; 131 } 132 sub out { 133 my $self = shift; 134 135 if (!$masm) { 136 sprintf "\$%s",$self->{value}; 137 } else { 138 $self->{value} =~ s/0x([0-9a-f]+)/0$1h/ig; 139 sprintf "%s",$self->{value}; 140 } 141 } 142} 143{ package ea; # pick up effective addresses: expr(%reg,%reg,scale) 144 sub re { 145 my $self = shift; # single instance in enough... 146 local *line = shift; 147 undef $ret; 148 149 if ($line =~ /^([^\(,]*)\(([%\w,]+)\)/) { 150 $self->{label} = $1; 151 ($self->{base},$self->{index},$self->{scale})=split(/,/,$2); 152 $self->{scale} = 1 if (!defined($self->{scale})); 153 $ret = $self; 154 $line = substr($line,@+[0]); $line =~ s/^\s+//; 155 156 $self->{base} =~ s/^%//; 157 $self->{index} =~ s/^%// if (defined($self->{index})); 158 } 159 $ret; 160 } 161 sub size {} 162 sub out { 163 my $self = shift; 164 my $sz = shift; 165 166 if (!$masm) { 167 # elder GNU assembler insists on 64-bit EAs:-( 168 # on pros side, this results in more compact code:-) 169 $self->{index} =~ s/^[er](.?[0-9xp])[d]?$/r\1/; 170 $self->{base} =~ s/^[er](.?[0-9xp])[d]?$/r\1/; 171 # Solaris /usr/ccs/bin/as can't handle multiplications 172 # in $self->{label} 173 $self->{label} =~ s/(?<![0-9a-f])(0[x0-9a-f]+)/oct($1)/eg; 174 $self->{label} =~ s/([0-9]+\s*[\*\/\%]\s*[0-9]+)/eval($1)/eg; 175 176 if (defined($self->{index})) { 177 sprintf "%s(%%%s,%%%s,%d)", 178 $self->{label},$self->{base}, 179 $self->{index},$self->{scale}; 180 } else { 181 sprintf "%s(%%%s)", $self->{label},$self->{base}; 182 } 183 } else { 184 %szmap = ( b=>"BYTE", w=>"WORD", l=>"DWORD", q=>"QWORD" ); 185 186 $self->{label} =~ s/\./\$/g; 187 $self->{label} =~ s/0x([0-9a-f]+)/0$1h/ig; 188 $self->{label} = "($self->{label})" if ($self->{label} =~ /[\*\+\-\/]/); 189 190 if (defined($self->{index})) { 191 sprintf "%s PTR %s[%s*%d+%s]",$szmap{$sz}, 192 $self->{label}, 193 $self->{index},$self->{scale}, 194 $self->{base}; 195 } else { 196 sprintf "%s PTR %s[%s]",$szmap{$sz}, 197 $self->{label},$self->{base}; 198 } 199 } 200 } 201} 202{ package register; # pick up registers, which start with %. 203 sub re { 204 my $class = shift; # muliple instances... 205 my $self = {}; 206 local *line = shift; 207 undef $ret; 208 209 if ($line =~ /^%(\w+)/) { 210 bless $self,$class; 211 $self->{value} = $1; 212 $ret = $self; 213 $line = substr($line,@+[0]); $line =~ s/^\s+//; 214 } 215 $ret; 216 } 217 sub size { 218 my $self = shift; 219 undef $ret; 220 221 if ($self->{value} =~ /^r[\d]+b$/i) { $ret="b"; } 222 elsif ($self->{value} =~ /^r[\d]+w$/i) { $ret="w"; } 223 elsif ($self->{value} =~ /^r[\d]+d$/i) { $ret="l"; } 224 elsif ($self->{value} =~ /^r[\w]+$/i) { $ret="q"; } 225 elsif ($self->{value} =~ /^[a-d][hl]$/i){ $ret="b"; } 226 elsif ($self->{value} =~ /^[\w]{2}l$/i) { $ret="b"; } 227 elsif ($self->{value} =~ /^[\w]{2}$/i) { $ret="w"; } 228 elsif ($self->{value} =~ /^e[a-z]{2}$/i){ $ret="l"; } 229 230 $ret; 231 } 232 sub out { 233 my $self = shift; 234 sprintf $masm?"%s":"%%%s",$self->{value}; 235 } 236} 237{ package label; # pick up labels, which end with : 238 sub re { 239 my $self = shift; # single instance is enough... 240 local *line = shift; 241 undef $ret; 242 243 if ($line =~ /(^[\.\w]+\:)/) { 244 $self->{value} = $1; 245 $ret = $self; 246 $line = substr($line,@+[0]); $line =~ s/^\s+//; 247 248 $self->{value} =~ s/\.L/\$L/ if ($masm); 249 } 250 $ret; 251 } 252 sub out { 253 my $self = shift; 254 255 if (!$masm) { 256 $self->{value}; 257 } elsif ($self->{value} ne "$current_function->{name}:") { 258 $self->{value}; 259 } elsif ($current_function->{abi} eq "svr4") { 260 my $func = "$current_function->{name} PROC\n". 261 " mov QWORD PTR 8[rsp],rdi\t;WIN64 prologue\n". 262 " mov QWORD PTR 16[rsp],rsi\n"; 263 my $narg = $current_function->{narg}; 264 $narg=6 if (!defined($narg)); 265 $func .= " mov rdi,rcx\n" if ($narg>0); 266 $func .= " mov rsi,rdx\n" if ($narg>1); 267 $func .= " mov rdx,r8\n" if ($narg>2); 268 $func .= " mov rcx,r9\n" if ($narg>3); 269 $func .= " mov r8,QWORD PTR 40[rsp]\n" if ($narg>4); 270 $func .= " mov r9,QWORD PTR 48[rsp]\n" if ($narg>5); 271 $func .= "\n"; 272 } else { 273 "$current_function->{name} PROC"; 274 } 275 } 276} 277{ package expr; # pick up expressioins 278 sub re { 279 my $self = shift; # single instance is enough... 280 local *line = shift; 281 undef $ret; 282 283 if ($line =~ /(^[^,]+)/) { 284 $self->{value} = $1; 285 $ret = $self; 286 $line = substr($line,@+[0]); $line =~ s/^\s+//; 287 288 $self->{value} =~ s/\.L/\$L/g if ($masm); 289 } 290 $ret; 291 } 292 sub out { 293 my $self = shift; 294 $self->{value}; 295 } 296} 297{ package directive; # pick up directives, which start with . 298 sub re { 299 my $self = shift; # single instance is enough... 300 local *line = shift; 301 undef $ret; 302 my $dir; 303 my %opcode = # lea 2f-1f(%rip),%dst; 1: nop; 2: 304 ( "%rax"=>0x01058d48, "%rcx"=>0x010d8d48, 305 "%rdx"=>0x01158d48, "%rbx"=>0x011d8d48, 306 "%rsp"=>0x01258d48, "%rbp"=>0x012d8d48, 307 "%rsi"=>0x01358d48, "%rdi"=>0x013d8d48, 308 "%r8" =>0x01058d4c, "%r9" =>0x010d8d4c, 309 "%r10"=>0x01158d4c, "%r11"=>0x011d8d4c, 310 "%r12"=>0x01258d4c, "%r13"=>0x012d8d4c, 311 "%r14"=>0x01358d4c, "%r15"=>0x013d8d4c ); 312 313 if ($line =~ /^\s*(\.\w+)/) { 314 if (!$masm) { 315 $self->{value} = $1; 316 $line =~ s/\@abi\-omnipotent/\@function/; 317 $line =~ s/\@function.*/\@function/; 318 if ($line =~ /\.picmeup\s+(%r[\w]+)/i) { 319 $self->{value} = sprintf "\t.long\t0x%x,0x90000000",$opcode{$1}; 320 } else { 321 $self->{value} = $line; 322 } 323 $line = ""; 324 return $self; 325 } 326 327 $dir = $1; 328 $ret = $self; 329 undef $self->{value}; 330 $line = substr($line,@+[0]); $line =~ s/^\s+//; 331 SWITCH: for ($dir) { 332 /\.(text)/ 333 && do { my $v=undef; 334 $v="$current_segment\tENDS\n" if ($current_segment); 335 $current_segment = "_$1\$"; 336 $current_segment =~ tr/[a-z]/[A-Z]/; 337 $v.="$current_segment\tSEGMENT ALIGN(64) 'CODE'"; 338 $self->{value} = $v; 339 last; 340 }; 341 /\.globl/ && do { $self->{value} = "PUBLIC\t".$line; last; }; 342 /\.type/ && do { ($sym,$type,$narg) = split(',',$line); 343 if ($type eq "\@function") { 344 undef $current_function; 345 $current_function->{name} = $sym; 346 $current_function->{abi} = "svr4"; 347 $current_function->{narg} = $narg; 348 } elsif ($type eq "\@abi-omnipotent") { 349 undef $current_function; 350 $current_function->{name} = $sym; 351 } 352 last; 353 }; 354 /\.size/ && do { if (defined($current_function)) { 355 $self->{value}="$current_function->{name}\tENDP"; 356 undef $current_function; 357 } 358 last; 359 }; 360 /\.align/ && do { $self->{value} = "ALIGN\t".$line; last; }; 361 /\.(byte|value|long|quad)/ 362 && do { my @arr = split(',',$line); 363 my $sz = substr($1,0,1); 364 my $last = pop(@arr); 365 366 $sz =~ tr/bvlq/BWDQ/; 367 $self->{value} = "\tD$sz\t"; 368 for (@arr) { $self->{value} .= sprintf"0%Xh,",oct; } 369 $self->{value} .= sprintf"0%Xh",oct($last); 370 last; 371 }; 372 /\.picmeup/ && do { $self->{value} = sprintf"\tDD\t 0%Xh,090000000h",$opcode{$line}; 373 last; 374 }; 375 } 376 $line = ""; 377 } 378 379 $ret; 380 } 381 sub out { 382 my $self = shift; 383 $self->{value}; 384 } 385} 386 387while($line=<>) { 388 389 chomp($line); 390 391 $line =~ s|[#!].*$||; # get rid of asm-style comments... 392 $line =~ s|/\*.*\*/||; # ... and C-style comments... 393 $line =~ s|^\s+||; # ... and skip white spaces in beginning 394 395 undef $label; 396 undef $opcode; 397 undef $dst; 398 undef $src; 399 undef $sz; 400 401 if ($label=label->re(\$line)) { print $label->out(); } 402 403 if (directive->re(\$line)) { 404 printf "%s",directive->out(); 405 } elsif ($opcode=opcode->re(\$line)) { ARGUMENT: { 406 407 if ($src=register->re(\$line)) { opcode->size($src->size()); } 408 elsif ($src=const->re(\$line)) { } 409 elsif ($src=ea->re(\$line)) { } 410 elsif ($src=expr->re(\$line)) { } 411 412 last ARGUMENT if ($line !~ /^,/); 413 414 $line = substr($line,1); $line =~ s/^\s+//; 415 416 if ($dst=register->re(\$line)) { opcode->size($dst->size()); } 417 elsif ($dst=const->re(\$line)) { } 418 elsif ($dst=ea->re(\$line)) { } 419 420 } # ARGUMENT: 421 422 $sz=opcode->size(); 423 424 if (defined($dst)) { 425 if (!$masm) { 426 printf "\t%s\t%s,%s", $opcode->out($dst->size()), 427 $src->out($sz),$dst->out($sz); 428 } else { 429 printf "\t%s\t%s,%s", $opcode->out(), 430 $dst->out($sz),$src->out($sz); 431 } 432 } elsif (defined($src)) { 433 printf "\t%s\t%s",$opcode->out(),$src->out($sz); 434 } else { 435 printf "\t%s",$opcode->out(); 436 } 437 } 438 439 print $line,"\n"; 440} 441 442print "\n$current_segment\tENDS\nEND\n" if ($masm); 443 444close STDOUT; 445 446################################################# 447# Cross-reference x86_64 ABI "card" 448# 449# Unix Win64 450# %rax * * 451# %rbx - - 452# %rcx #4 #1 453# %rdx #3 #2 454# %rsi #2 - 455# %rdi #1 - 456# %rbp - - 457# %rsp - - 458# %r8 #5 #3 459# %r9 #6 #4 460# %r10 * * 461# %r11 * * 462# %r12 - - 463# %r13 - - 464# %r14 - - 465# %r15 - - 466# 467# (*) volatile register 468# (-) preserved by callee 469# (#) Nth argument, volatile 470# 471# In Unix terms top of stack is argument transfer area for arguments 472# which could not be accomodated in registers. Or in other words 7th 473# [integer] argument resides at 8(%rsp) upon function entry point. 474# 128 bytes above %rsp constitute a "red zone" which is not touched 475# by signal handlers and can be used as temporal storage without 476# allocating a frame. 477# 478# In Win64 terms N*8 bytes on top of stack is argument transfer area, 479# which belongs to/can be overwritten by callee. N is the number of 480# arguments passed to callee, *but* not less than 4! This means that 481# upon function entry point 5th argument resides at 40(%rsp), as well 482# as that 32 bytes from 8(%rsp) can always be used as temporal 483# storage [without allocating a frame]. 484# 485# All the above means that if assembler programmer adheres to Unix 486# register and stack layout, but disregards the "red zone" existense, 487# it's possible to use following prologue and epilogue to "gear" from 488# Unix to Win64 ABI in leaf functions with not more than 6 arguments. 489# 490# omnipotent_function: 491# ifdef WIN64 492# movq %rdi,8(%rsp) 493# movq %rsi,16(%rsp) 494# movq %rcx,%rdi ; if 1st argument is actually present 495# movq %rdx,%rsi ; if 2nd argument is actually ... 496# movq %r8,%rdx ; if 3rd argument is ... 497# movq %r9,%rcx ; if 4th argument ... 498# movq 40(%rsp),%r8 ; if 5th ... 499# movq 48(%rsp),%r9 ; if 6th ... 500# endif 501# ... 502# ifdef WIN64 503# movq 8(%rsp),%rdi 504# movq 16(%rsp),%rsi 505# endif 506# ret 507